sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, TokenError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 TokenType.SESSION, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.MERGE, 535 TokenType.NATURAL, 536 TokenType.NEXT, 537 TokenType.OFFSET, 538 TokenType.OPERATOR, 539 TokenType.ORDINALITY, 540 TokenType.OVERLAPS, 541 TokenType.OVERWRITE, 542 TokenType.PARTITION, 543 TokenType.PERCENT, 544 TokenType.PIVOT, 545 TokenType.PRAGMA, 546 TokenType.PUT, 547 TokenType.RANGE, 548 TokenType.RECURSIVE, 549 TokenType.REFERENCES, 550 TokenType.REFRESH, 551 TokenType.RENAME, 552 TokenType.REPLACE, 553 TokenType.RIGHT, 554 TokenType.ROLLUP, 555 TokenType.ROW, 556 TokenType.ROWS, 557 TokenType.SEMI, 558 TokenType.SET, 559 TokenType.SETTINGS, 560 TokenType.SHOW, 561 TokenType.TEMPORARY, 562 TokenType.TOP, 563 TokenType.TRUE, 564 TokenType.TRUNCATE, 565 TokenType.UNIQUE, 566 TokenType.UNNEST, 567 TokenType.UNPIVOT, 568 TokenType.UPDATE, 569 TokenType.USE, 570 TokenType.VOLATILE, 571 TokenType.WINDOW, 572 *ALTERABLES, 573 *CREATABLES, 574 *SUBQUERY_PREDICATES, 575 *TYPE_TOKENS, 576 *NO_PAREN_FUNCTIONS, 577 } 578 ID_VAR_TOKENS.remove(TokenType.UNION) 579 580 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 581 TokenType.ANTI, 582 TokenType.ASOF, 583 TokenType.FULL, 584 TokenType.LEFT, 585 TokenType.LOCK, 586 TokenType.NATURAL, 587 TokenType.RIGHT, 588 TokenType.SEMI, 589 TokenType.WINDOW, 590 } 591 592 ALIAS_TOKENS = ID_VAR_TOKENS 593 594 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 595 596 ARRAY_CONSTRUCTORS = { 597 "ARRAY": exp.Array, 598 "LIST": exp.List, 599 } 600 601 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 602 603 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 604 605 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 606 607 FUNC_TOKENS = { 608 TokenType.COLLATE, 609 TokenType.COMMAND, 610 TokenType.CURRENT_DATE, 611 TokenType.CURRENT_DATETIME, 612 TokenType.CURRENT_SCHEMA, 613 TokenType.CURRENT_TIMESTAMP, 614 TokenType.CURRENT_TIME, 615 TokenType.CURRENT_USER, 616 TokenType.FILTER, 617 TokenType.FIRST, 618 TokenType.FORMAT, 619 TokenType.GET, 620 TokenType.GLOB, 621 TokenType.IDENTIFIER, 622 TokenType.INDEX, 623 TokenType.ISNULL, 624 TokenType.ILIKE, 625 TokenType.INSERT, 626 TokenType.LIKE, 627 TokenType.MERGE, 628 TokenType.NEXT, 629 TokenType.OFFSET, 630 TokenType.PRIMARY_KEY, 631 TokenType.RANGE, 632 TokenType.REPLACE, 633 TokenType.RLIKE, 634 TokenType.ROW, 635 TokenType.UNNEST, 636 TokenType.VAR, 637 TokenType.LEFT, 638 TokenType.RIGHT, 639 TokenType.SEQUENCE, 640 TokenType.DATE, 641 TokenType.DATETIME, 642 TokenType.TABLE, 643 TokenType.TIMESTAMP, 644 TokenType.TIMESTAMPTZ, 645 TokenType.TRUNCATE, 646 TokenType.UTC_DATE, 647 TokenType.UTC_TIME, 648 TokenType.UTC_TIMESTAMP, 649 TokenType.WINDOW, 650 TokenType.XOR, 651 *TYPE_TOKENS, 652 *SUBQUERY_PREDICATES, 653 } 654 655 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 656 TokenType.AND: exp.And, 657 } 658 659 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 660 TokenType.COLON_EQ: exp.PropertyEQ, 661 } 662 663 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 664 TokenType.OR: exp.Or, 665 } 666 667 EQUALITY = { 668 TokenType.EQ: exp.EQ, 669 TokenType.NEQ: exp.NEQ, 670 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 671 } 672 673 COMPARISON = { 674 TokenType.GT: exp.GT, 675 TokenType.GTE: exp.GTE, 676 TokenType.LT: exp.LT, 677 TokenType.LTE: exp.LTE, 678 } 679 680 BITWISE = { 681 TokenType.AMP: exp.BitwiseAnd, 682 TokenType.CARET: exp.BitwiseXor, 683 TokenType.PIPE: exp.BitwiseOr, 684 } 685 686 TERM = { 687 TokenType.DASH: exp.Sub, 688 TokenType.PLUS: exp.Add, 689 TokenType.MOD: exp.Mod, 690 TokenType.COLLATE: exp.Collate, 691 } 692 693 FACTOR = { 694 TokenType.DIV: exp.IntDiv, 695 TokenType.LR_ARROW: exp.Distance, 696 TokenType.SLASH: exp.Div, 697 TokenType.STAR: exp.Mul, 698 } 699 700 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 701 702 TIMES = { 703 TokenType.TIME, 704 TokenType.TIMETZ, 705 } 706 707 TIMESTAMPS = { 708 TokenType.TIMESTAMP, 709 TokenType.TIMESTAMPNTZ, 710 TokenType.TIMESTAMPTZ, 711 TokenType.TIMESTAMPLTZ, 712 *TIMES, 713 } 714 715 SET_OPERATIONS = { 716 TokenType.UNION, 717 TokenType.INTERSECT, 718 TokenType.EXCEPT, 719 } 720 721 JOIN_METHODS = { 722 TokenType.ASOF, 723 TokenType.NATURAL, 724 TokenType.POSITIONAL, 725 } 726 727 JOIN_SIDES = { 728 TokenType.LEFT, 729 TokenType.RIGHT, 730 TokenType.FULL, 731 } 732 733 JOIN_KINDS = { 734 TokenType.ANTI, 735 TokenType.CROSS, 736 TokenType.INNER, 737 TokenType.OUTER, 738 TokenType.SEMI, 739 TokenType.STRAIGHT_JOIN, 740 } 741 742 JOIN_HINTS: t.Set[str] = set() 743 744 LAMBDAS = { 745 TokenType.ARROW: lambda self, expressions: self.expression( 746 exp.Lambda, 747 this=self._replace_lambda( 748 self._parse_assignment(), 749 expressions, 750 ), 751 expressions=expressions, 752 ), 753 TokenType.FARROW: lambda self, expressions: self.expression( 754 exp.Kwarg, 755 this=exp.var(expressions[0].name), 756 expression=self._parse_assignment(), 757 ), 758 } 759 760 COLUMN_OPERATORS = { 761 TokenType.DOT: None, 762 TokenType.DOTCOLON: lambda self, this, to: self.expression( 763 exp.JSONCast, 764 this=this, 765 to=to, 766 ), 767 TokenType.DCOLON: lambda self, this, to: self.build_cast( 768 strict=self.STRICT_CAST, this=this, to=to 769 ), 770 TokenType.ARROW: lambda self, this, path: self.expression( 771 exp.JSONExtract, 772 this=this, 773 expression=self.dialect.to_json_path(path), 774 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 775 ), 776 TokenType.DARROW: lambda self, this, path: self.expression( 777 exp.JSONExtractScalar, 778 this=this, 779 expression=self.dialect.to_json_path(path), 780 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 781 ), 782 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 783 exp.JSONBExtract, 784 this=this, 785 expression=path, 786 ), 787 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 788 exp.JSONBExtractScalar, 789 this=this, 790 expression=path, 791 ), 792 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 793 exp.JSONBContains, 794 this=this, 795 expression=key, 796 ), 797 } 798 799 CAST_COLUMN_OPERATORS = { 800 TokenType.DOTCOLON, 801 TokenType.DCOLON, 802 } 803 804 EXPRESSION_PARSERS = { 805 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 806 exp.Column: lambda self: self._parse_column(), 807 exp.Condition: lambda self: self._parse_assignment(), 808 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 809 exp.Expression: lambda self: self._parse_expression(), 810 exp.From: lambda self: self._parse_from(joins=True), 811 exp.Group: lambda self: self._parse_group(), 812 exp.Having: lambda self: self._parse_having(), 813 exp.Hint: lambda self: self._parse_hint_body(), 814 exp.Identifier: lambda self: self._parse_id_var(), 815 exp.Join: lambda self: self._parse_join(), 816 exp.Lambda: lambda self: self._parse_lambda(), 817 exp.Lateral: lambda self: self._parse_lateral(), 818 exp.Limit: lambda self: self._parse_limit(), 819 exp.Offset: lambda self: self._parse_offset(), 820 exp.Order: lambda self: self._parse_order(), 821 exp.Ordered: lambda self: self._parse_ordered(), 822 exp.Properties: lambda self: self._parse_properties(), 823 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 824 exp.Qualify: lambda self: self._parse_qualify(), 825 exp.Returning: lambda self: self._parse_returning(), 826 exp.Select: lambda self: self._parse_select(), 827 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 828 exp.Table: lambda self: self._parse_table_parts(), 829 exp.TableAlias: lambda self: self._parse_table_alias(), 830 exp.Tuple: lambda self: self._parse_value(values=False), 831 exp.Whens: lambda self: self._parse_when_matched(), 832 exp.Where: lambda self: self._parse_where(), 833 exp.Window: lambda self: self._parse_named_window(), 834 exp.With: lambda self: self._parse_with(), 835 "JOIN_TYPE": lambda self: self._parse_join_parts(), 836 } 837 838 STATEMENT_PARSERS = { 839 TokenType.ALTER: lambda self: self._parse_alter(), 840 TokenType.ANALYZE: lambda self: self._parse_analyze(), 841 TokenType.BEGIN: lambda self: self._parse_transaction(), 842 TokenType.CACHE: lambda self: self._parse_cache(), 843 TokenType.COMMENT: lambda self: self._parse_comment(), 844 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 845 TokenType.COPY: lambda self: self._parse_copy(), 846 TokenType.CREATE: lambda self: self._parse_create(), 847 TokenType.DELETE: lambda self: self._parse_delete(), 848 TokenType.DESC: lambda self: self._parse_describe(), 849 TokenType.DESCRIBE: lambda self: self._parse_describe(), 850 TokenType.DROP: lambda self: self._parse_drop(), 851 TokenType.GRANT: lambda self: self._parse_grant(), 852 TokenType.REVOKE: lambda self: self._parse_revoke(), 853 TokenType.INSERT: lambda self: self._parse_insert(), 854 TokenType.KILL: lambda self: self._parse_kill(), 855 TokenType.LOAD: lambda self: self._parse_load(), 856 TokenType.MERGE: lambda self: self._parse_merge(), 857 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 858 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 859 TokenType.REFRESH: lambda self: self._parse_refresh(), 860 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 861 TokenType.SET: lambda self: self._parse_set(), 862 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 863 TokenType.UNCACHE: lambda self: self._parse_uncache(), 864 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 865 TokenType.UPDATE: lambda self: self._parse_update(), 866 TokenType.USE: lambda self: self._parse_use(), 867 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 868 } 869 870 UNARY_PARSERS = { 871 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 872 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 873 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 874 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 875 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 876 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 877 } 878 879 STRING_PARSERS = { 880 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 881 exp.RawString, this=token.text 882 ), 883 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 884 exp.National, this=token.text 885 ), 886 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 887 TokenType.STRING: lambda self, token: self.expression( 888 exp.Literal, this=token.text, is_string=True 889 ), 890 TokenType.UNICODE_STRING: lambda self, token: self.expression( 891 exp.UnicodeString, 892 this=token.text, 893 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 894 ), 895 } 896 897 NUMERIC_PARSERS = { 898 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 899 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 900 TokenType.HEX_STRING: lambda self, token: self.expression( 901 exp.HexString, 902 this=token.text, 903 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 904 ), 905 TokenType.NUMBER: lambda self, token: self.expression( 906 exp.Literal, this=token.text, is_string=False 907 ), 908 } 909 910 PRIMARY_PARSERS = { 911 **STRING_PARSERS, 912 **NUMERIC_PARSERS, 913 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 914 TokenType.NULL: lambda self, _: self.expression(exp.Null), 915 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 916 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 917 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 918 TokenType.STAR: lambda self, _: self._parse_star_ops(), 919 } 920 921 PLACEHOLDER_PARSERS = { 922 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 923 TokenType.PARAMETER: lambda self: self._parse_parameter(), 924 TokenType.COLON: lambda self: ( 925 self.expression(exp.Placeholder, this=self._prev.text) 926 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 927 else None 928 ), 929 } 930 931 RANGE_PARSERS = { 932 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 933 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 934 TokenType.GLOB: binary_range_parser(exp.Glob), 935 TokenType.ILIKE: binary_range_parser(exp.ILike), 936 TokenType.IN: lambda self, this: self._parse_in(this), 937 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 938 TokenType.IS: lambda self, this: self._parse_is(this), 939 TokenType.LIKE: binary_range_parser(exp.Like), 940 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 941 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 942 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 943 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 944 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 945 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 946 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 947 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 948 } 949 950 PIPE_SYNTAX_TRANSFORM_PARSERS = { 951 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 952 "AS": lambda self, query: self._build_pipe_cte( 953 query, [exp.Star()], self._parse_table_alias() 954 ), 955 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 956 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 957 "ORDER BY": lambda self, query: query.order_by( 958 self._parse_order(), append=False, copy=False 959 ), 960 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 961 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 962 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 963 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 964 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 965 } 966 967 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 968 "ALLOWED_VALUES": lambda self: self.expression( 969 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 970 ), 971 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 972 "AUTO": lambda self: self._parse_auto_property(), 973 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 974 "BACKUP": lambda self: self.expression( 975 exp.BackupProperty, this=self._parse_var(any_token=True) 976 ), 977 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 978 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 979 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 980 "CHECKSUM": lambda self: self._parse_checksum(), 981 "CLUSTER BY": lambda self: self._parse_cluster(), 982 "CLUSTERED": lambda self: self._parse_clustered_by(), 983 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 984 exp.CollateProperty, **kwargs 985 ), 986 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 987 "CONTAINS": lambda self: self._parse_contains_property(), 988 "COPY": lambda self: self._parse_copy_property(), 989 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 990 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 991 "DEFINER": lambda self: self._parse_definer(), 992 "DETERMINISTIC": lambda self: self.expression( 993 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 994 ), 995 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 996 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 997 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 998 "DISTKEY": lambda self: self._parse_distkey(), 999 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1000 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1001 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1002 "ENVIRONMENT": lambda self: self.expression( 1003 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1004 ), 1005 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1006 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1007 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1008 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1009 "FREESPACE": lambda self: self._parse_freespace(), 1010 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1011 "HEAP": lambda self: self.expression(exp.HeapProperty), 1012 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1013 "IMMUTABLE": lambda self: self.expression( 1014 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1015 ), 1016 "INHERITS": lambda self: self.expression( 1017 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1018 ), 1019 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1020 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1021 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1022 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1023 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1024 "LIKE": lambda self: self._parse_create_like(), 1025 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1026 "LOCK": lambda self: self._parse_locking(), 1027 "LOCKING": lambda self: self._parse_locking(), 1028 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1029 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1030 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1031 "MODIFIES": lambda self: self._parse_modifies_property(), 1032 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1033 "NO": lambda self: self._parse_no_property(), 1034 "ON": lambda self: self._parse_on_property(), 1035 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1036 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1037 "PARTITION": lambda self: self._parse_partitioned_of(), 1038 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1039 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1040 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1041 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1042 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1043 "READS": lambda self: self._parse_reads_property(), 1044 "REMOTE": lambda self: self._parse_remote_with_connection(), 1045 "RETURNS": lambda self: self._parse_returns(), 1046 "STRICT": lambda self: self.expression(exp.StrictProperty), 1047 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1048 "ROW": lambda self: self._parse_row(), 1049 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1050 "SAMPLE": lambda self: self.expression( 1051 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1052 ), 1053 "SECURE": lambda self: self.expression(exp.SecureProperty), 1054 "SECURITY": lambda self: self._parse_security(), 1055 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1056 "SETTINGS": lambda self: self._parse_settings_property(), 1057 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1058 "SORTKEY": lambda self: self._parse_sortkey(), 1059 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1060 "STABLE": lambda self: self.expression( 1061 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1062 ), 1063 "STORED": lambda self: self._parse_stored(), 1064 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1065 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1066 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1067 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1068 "TO": lambda self: self._parse_to_table(), 1069 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1070 "TRANSFORM": lambda self: self.expression( 1071 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1072 ), 1073 "TTL": lambda self: self._parse_ttl(), 1074 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1075 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1076 "VOLATILE": lambda self: self._parse_volatile_property(), 1077 "WITH": lambda self: self._parse_with_property(), 1078 } 1079 1080 CONSTRAINT_PARSERS = { 1081 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1082 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1083 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1084 "CHARACTER SET": lambda self: self.expression( 1085 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1086 ), 1087 "CHECK": lambda self: self.expression( 1088 exp.CheckColumnConstraint, 1089 this=self._parse_wrapped(self._parse_assignment), 1090 enforced=self._match_text_seq("ENFORCED"), 1091 ), 1092 "COLLATE": lambda self: self.expression( 1093 exp.CollateColumnConstraint, 1094 this=self._parse_identifier() or self._parse_column(), 1095 ), 1096 "COMMENT": lambda self: self.expression( 1097 exp.CommentColumnConstraint, this=self._parse_string() 1098 ), 1099 "COMPRESS": lambda self: self._parse_compress(), 1100 "CLUSTERED": lambda self: self.expression( 1101 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1102 ), 1103 "NONCLUSTERED": lambda self: self.expression( 1104 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1105 ), 1106 "DEFAULT": lambda self: self.expression( 1107 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1108 ), 1109 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1110 "EPHEMERAL": lambda self: self.expression( 1111 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1112 ), 1113 "EXCLUDE": lambda self: self.expression( 1114 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1115 ), 1116 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1117 "FORMAT": lambda self: self.expression( 1118 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1119 ), 1120 "GENERATED": lambda self: self._parse_generated_as_identity(), 1121 "IDENTITY": lambda self: self._parse_auto_increment(), 1122 "INLINE": lambda self: self._parse_inline(), 1123 "LIKE": lambda self: self._parse_create_like(), 1124 "NOT": lambda self: self._parse_not_constraint(), 1125 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1126 "ON": lambda self: ( 1127 self._match(TokenType.UPDATE) 1128 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1129 ) 1130 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1131 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1132 "PERIOD": lambda self: self._parse_period_for_system_time(), 1133 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1134 "REFERENCES": lambda self: self._parse_references(match=False), 1135 "TITLE": lambda self: self.expression( 1136 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1137 ), 1138 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1139 "UNIQUE": lambda self: self._parse_unique(), 1140 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1141 "WATERMARK": lambda self: self.expression( 1142 exp.WatermarkColumnConstraint, 1143 this=self._match(TokenType.FOR) and self._parse_column(), 1144 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1145 ), 1146 "WITH": lambda self: self.expression( 1147 exp.Properties, expressions=self._parse_wrapped_properties() 1148 ), 1149 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1150 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1151 } 1152 1153 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1154 if not self._match(TokenType.L_PAREN, advance=False): 1155 # Partitioning by bucket or truncate follows the syntax: 1156 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1157 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1158 self._retreat(self._index - 1) 1159 return None 1160 1161 klass = ( 1162 exp.PartitionedByBucket 1163 if self._prev.text.upper() == "BUCKET" 1164 else exp.PartitionByTruncate 1165 ) 1166 1167 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1168 this, expression = seq_get(args, 0), seq_get(args, 1) 1169 1170 if isinstance(this, exp.Literal): 1171 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1172 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1173 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1174 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1175 # 1176 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1177 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1178 this, expression = expression, this 1179 1180 return self.expression(klass, this=this, expression=expression) 1181 1182 ALTER_PARSERS = { 1183 "ADD": lambda self: self._parse_alter_table_add(), 1184 "AS": lambda self: self._parse_select(), 1185 "ALTER": lambda self: self._parse_alter_table_alter(), 1186 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1187 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1188 "DROP": lambda self: self._parse_alter_table_drop(), 1189 "RENAME": lambda self: self._parse_alter_table_rename(), 1190 "SET": lambda self: self._parse_alter_table_set(), 1191 "SWAP": lambda self: self.expression( 1192 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1193 ), 1194 } 1195 1196 ALTER_ALTER_PARSERS = { 1197 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1198 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1199 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1200 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1201 } 1202 1203 SCHEMA_UNNAMED_CONSTRAINTS = { 1204 "CHECK", 1205 "EXCLUDE", 1206 "FOREIGN KEY", 1207 "LIKE", 1208 "PERIOD", 1209 "PRIMARY KEY", 1210 "UNIQUE", 1211 "WATERMARK", 1212 "BUCKET", 1213 "TRUNCATE", 1214 } 1215 1216 NO_PAREN_FUNCTION_PARSERS = { 1217 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1218 "CASE": lambda self: self._parse_case(), 1219 "CONNECT_BY_ROOT": lambda self: self.expression( 1220 exp.ConnectByRoot, this=self._parse_column() 1221 ), 1222 "IF": lambda self: self._parse_if(), 1223 } 1224 1225 INVALID_FUNC_NAME_TOKENS = { 1226 TokenType.IDENTIFIER, 1227 TokenType.STRING, 1228 } 1229 1230 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1231 1232 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1233 1234 FUNCTION_PARSERS = { 1235 **{ 1236 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1237 }, 1238 **{ 1239 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1240 }, 1241 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1242 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1243 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1244 "DECODE": lambda self: self._parse_decode(), 1245 "EXTRACT": lambda self: self._parse_extract(), 1246 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1247 "GAP_FILL": lambda self: self._parse_gap_fill(), 1248 "JSON_OBJECT": lambda self: self._parse_json_object(), 1249 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1250 "JSON_TABLE": lambda self: self._parse_json_table(), 1251 "MATCH": lambda self: self._parse_match_against(), 1252 "NORMALIZE": lambda self: self._parse_normalize(), 1253 "OPENJSON": lambda self: self._parse_open_json(), 1254 "OVERLAY": lambda self: self._parse_overlay(), 1255 "POSITION": lambda self: self._parse_position(), 1256 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1257 "STRING_AGG": lambda self: self._parse_string_agg(), 1258 "SUBSTRING": lambda self: self._parse_substring(), 1259 "TRIM": lambda self: self._parse_trim(), 1260 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1261 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1262 "XMLELEMENT": lambda self: self.expression( 1263 exp.XMLElement, 1264 this=self._match_text_seq("NAME") and self._parse_id_var(), 1265 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1266 ), 1267 "XMLTABLE": lambda self: self._parse_xml_table(), 1268 } 1269 1270 QUERY_MODIFIER_PARSERS = { 1271 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1272 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1273 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1274 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1275 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1276 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1277 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1278 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1279 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1280 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1281 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1282 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1283 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1284 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1285 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1286 TokenType.CLUSTER_BY: lambda self: ( 1287 "cluster", 1288 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1289 ), 1290 TokenType.DISTRIBUTE_BY: lambda self: ( 1291 "distribute", 1292 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1293 ), 1294 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1295 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1296 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1297 } 1298 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1299 1300 SET_PARSERS = { 1301 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1302 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1303 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1304 "TRANSACTION": lambda self: self._parse_set_transaction(), 1305 } 1306 1307 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1308 1309 TYPE_LITERAL_PARSERS = { 1310 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1311 } 1312 1313 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1314 1315 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1316 1317 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1318 1319 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1320 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1321 "ISOLATION": ( 1322 ("LEVEL", "REPEATABLE", "READ"), 1323 ("LEVEL", "READ", "COMMITTED"), 1324 ("LEVEL", "READ", "UNCOMITTED"), 1325 ("LEVEL", "SERIALIZABLE"), 1326 ), 1327 "READ": ("WRITE", "ONLY"), 1328 } 1329 1330 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1331 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1332 ) 1333 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1334 1335 CREATE_SEQUENCE: OPTIONS_TYPE = { 1336 "SCALE": ("EXTEND", "NOEXTEND"), 1337 "SHARD": ("EXTEND", "NOEXTEND"), 1338 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1339 **dict.fromkeys( 1340 ( 1341 "SESSION", 1342 "GLOBAL", 1343 "KEEP", 1344 "NOKEEP", 1345 "ORDER", 1346 "NOORDER", 1347 "NOCACHE", 1348 "CYCLE", 1349 "NOCYCLE", 1350 "NOMINVALUE", 1351 "NOMAXVALUE", 1352 "NOSCALE", 1353 "NOSHARD", 1354 ), 1355 tuple(), 1356 ), 1357 } 1358 1359 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1360 1361 USABLES: OPTIONS_TYPE = dict.fromkeys( 1362 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1363 ) 1364 1365 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1366 1367 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1368 "TYPE": ("EVOLUTION",), 1369 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1370 } 1371 1372 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1373 1374 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1375 1376 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1377 "NOT": ("ENFORCED",), 1378 "MATCH": ( 1379 "FULL", 1380 "PARTIAL", 1381 "SIMPLE", 1382 ), 1383 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1384 "USING": ( 1385 "BTREE", 1386 "HASH", 1387 ), 1388 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1389 } 1390 1391 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1392 "NO": ("OTHERS",), 1393 "CURRENT": ("ROW",), 1394 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1395 } 1396 1397 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1398 1399 CLONE_KEYWORDS = {"CLONE", "COPY"} 1400 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1401 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1402 1403 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1404 1405 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1406 1407 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1408 1409 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1410 1411 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1412 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1413 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1414 1415 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1416 1417 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1418 1419 ADD_CONSTRAINT_TOKENS = { 1420 TokenType.CONSTRAINT, 1421 TokenType.FOREIGN_KEY, 1422 TokenType.INDEX, 1423 TokenType.KEY, 1424 TokenType.PRIMARY_KEY, 1425 TokenType.UNIQUE, 1426 } 1427 1428 DISTINCT_TOKENS = {TokenType.DISTINCT} 1429 1430 NULL_TOKENS = {TokenType.NULL} 1431 1432 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1433 1434 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1435 1436 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1437 1438 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1439 1440 ODBC_DATETIME_LITERALS = { 1441 "d": exp.Date, 1442 "t": exp.Time, 1443 "ts": exp.Timestamp, 1444 } 1445 1446 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1447 1448 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1449 1450 # The style options for the DESCRIBE statement 1451 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1452 1453 # The style options for the ANALYZE statement 1454 ANALYZE_STYLES = { 1455 "BUFFER_USAGE_LIMIT", 1456 "FULL", 1457 "LOCAL", 1458 "NO_WRITE_TO_BINLOG", 1459 "SAMPLE", 1460 "SKIP_LOCKED", 1461 "VERBOSE", 1462 } 1463 1464 ANALYZE_EXPRESSION_PARSERS = { 1465 "ALL": lambda self: self._parse_analyze_columns(), 1466 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1467 "DELETE": lambda self: self._parse_analyze_delete(), 1468 "DROP": lambda self: self._parse_analyze_histogram(), 1469 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1470 "LIST": lambda self: self._parse_analyze_list(), 1471 "PREDICATE": lambda self: self._parse_analyze_columns(), 1472 "UPDATE": lambda self: self._parse_analyze_histogram(), 1473 "VALIDATE": lambda self: self._parse_analyze_validate(), 1474 } 1475 1476 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1477 1478 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1479 1480 OPERATION_MODIFIERS: t.Set[str] = set() 1481 1482 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1483 1484 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1485 1486 STRICT_CAST = True 1487 1488 PREFIXED_PIVOT_COLUMNS = False 1489 IDENTIFY_PIVOT_STRINGS = False 1490 1491 LOG_DEFAULTS_TO_LN = False 1492 1493 # Whether the table sample clause expects CSV syntax 1494 TABLESAMPLE_CSV = False 1495 1496 # The default method used for table sampling 1497 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1498 1499 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1500 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1501 1502 # Whether the TRIM function expects the characters to trim as its first argument 1503 TRIM_PATTERN_FIRST = False 1504 1505 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1506 STRING_ALIASES = False 1507 1508 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1509 MODIFIERS_ATTACHED_TO_SET_OP = True 1510 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1511 1512 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1513 NO_PAREN_IF_COMMANDS = True 1514 1515 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1516 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1517 1518 # Whether the `:` operator is used to extract a value from a VARIANT column 1519 COLON_IS_VARIANT_EXTRACT = False 1520 1521 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1522 # If this is True and '(' is not found, the keyword will be treated as an identifier 1523 VALUES_FOLLOWED_BY_PAREN = True 1524 1525 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1526 SUPPORTS_IMPLICIT_UNNEST = False 1527 1528 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1529 INTERVAL_SPANS = True 1530 1531 # Whether a PARTITION clause can follow a table reference 1532 SUPPORTS_PARTITION_SELECTION = False 1533 1534 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1535 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1536 1537 # Whether the 'AS' keyword is optional in the CTE definition syntax 1538 OPTIONAL_ALIAS_TOKEN_CTE = True 1539 1540 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1541 ALTER_RENAME_REQUIRES_COLUMN = True 1542 1543 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1544 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1545 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1546 # as BigQuery, where all joins have the same precedence. 1547 JOINS_HAVE_EQUAL_PRECEDENCE = False 1548 1549 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1550 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1551 1552 # Whether map literals support arbitrary expressions as keys. 1553 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1554 # When False, keys are typically restricted to identifiers. 1555 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1556 1557 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1558 # is true for Snowflake but not for BigQuery which can also process strings 1559 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1560 1561 # Dialects like Databricks support JOINS without join criteria 1562 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1563 ADD_JOIN_ON_TRUE = False 1564 1565 __slots__ = ( 1566 "error_level", 1567 "error_message_context", 1568 "max_errors", 1569 "dialect", 1570 "sql", 1571 "errors", 1572 "_tokens", 1573 "_index", 1574 "_curr", 1575 "_next", 1576 "_prev", 1577 "_prev_comments", 1578 "_pipe_cte_counter", 1579 ) 1580 1581 # Autofilled 1582 SHOW_TRIE: t.Dict = {} 1583 SET_TRIE: t.Dict = {} 1584 1585 def __init__( 1586 self, 1587 error_level: t.Optional[ErrorLevel] = None, 1588 error_message_context: int = 100, 1589 max_errors: int = 3, 1590 dialect: DialectType = None, 1591 ): 1592 from sqlglot.dialects import Dialect 1593 1594 self.error_level = error_level or ErrorLevel.IMMEDIATE 1595 self.error_message_context = error_message_context 1596 self.max_errors = max_errors 1597 self.dialect = Dialect.get_or_raise(dialect) 1598 self.reset() 1599 1600 def reset(self): 1601 self.sql = "" 1602 self.errors = [] 1603 self._tokens = [] 1604 self._index = 0 1605 self._curr = None 1606 self._next = None 1607 self._prev = None 1608 self._prev_comments = None 1609 self._pipe_cte_counter = 0 1610 1611 def parse( 1612 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1613 ) -> t.List[t.Optional[exp.Expression]]: 1614 """ 1615 Parses a list of tokens and returns a list of syntax trees, one tree 1616 per parsed SQL statement. 1617 1618 Args: 1619 raw_tokens: The list of tokens. 1620 sql: The original SQL string, used to produce helpful debug messages. 1621 1622 Returns: 1623 The list of the produced syntax trees. 1624 """ 1625 return self._parse( 1626 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1627 ) 1628 1629 def parse_into( 1630 self, 1631 expression_types: exp.IntoType, 1632 raw_tokens: t.List[Token], 1633 sql: t.Optional[str] = None, 1634 ) -> t.List[t.Optional[exp.Expression]]: 1635 """ 1636 Parses a list of tokens into a given Expression type. If a collection of Expression 1637 types is given instead, this method will try to parse the token list into each one 1638 of them, stopping at the first for which the parsing succeeds. 1639 1640 Args: 1641 expression_types: The expression type(s) to try and parse the token list into. 1642 raw_tokens: The list of tokens. 1643 sql: The original SQL string, used to produce helpful debug messages. 1644 1645 Returns: 1646 The target Expression. 1647 """ 1648 errors = [] 1649 for expression_type in ensure_list(expression_types): 1650 parser = self.EXPRESSION_PARSERS.get(expression_type) 1651 if not parser: 1652 raise TypeError(f"No parser registered for {expression_type}") 1653 1654 try: 1655 return self._parse(parser, raw_tokens, sql) 1656 except ParseError as e: 1657 e.errors[0]["into_expression"] = expression_type 1658 errors.append(e) 1659 1660 raise ParseError( 1661 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1662 errors=merge_errors(errors), 1663 ) from errors[-1] 1664 1665 def _parse( 1666 self, 1667 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1668 raw_tokens: t.List[Token], 1669 sql: t.Optional[str] = None, 1670 ) -> t.List[t.Optional[exp.Expression]]: 1671 self.reset() 1672 self.sql = sql or "" 1673 1674 total = len(raw_tokens) 1675 chunks: t.List[t.List[Token]] = [[]] 1676 1677 for i, token in enumerate(raw_tokens): 1678 if token.token_type == TokenType.SEMICOLON: 1679 if token.comments: 1680 chunks.append([token]) 1681 1682 if i < total - 1: 1683 chunks.append([]) 1684 else: 1685 chunks[-1].append(token) 1686 1687 expressions = [] 1688 1689 for tokens in chunks: 1690 self._index = -1 1691 self._tokens = tokens 1692 self._advance() 1693 1694 expressions.append(parse_method(self)) 1695 1696 if self._index < len(self._tokens): 1697 self.raise_error("Invalid expression / Unexpected token") 1698 1699 self.check_errors() 1700 1701 return expressions 1702 1703 def check_errors(self) -> None: 1704 """Logs or raises any found errors, depending on the chosen error level setting.""" 1705 if self.error_level == ErrorLevel.WARN: 1706 for error in self.errors: 1707 logger.error(str(error)) 1708 elif self.error_level == ErrorLevel.RAISE and self.errors: 1709 raise ParseError( 1710 concat_messages(self.errors, self.max_errors), 1711 errors=merge_errors(self.errors), 1712 ) 1713 1714 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1715 """ 1716 Appends an error in the list of recorded errors or raises it, depending on the chosen 1717 error level setting. 1718 """ 1719 token = token or self._curr or self._prev or Token.string("") 1720 start = token.start 1721 end = token.end + 1 1722 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1723 highlight = self.sql[start:end] 1724 end_context = self.sql[end : end + self.error_message_context] 1725 1726 error = ParseError.new( 1727 f"{message}. Line {token.line}, Col: {token.col}.\n" 1728 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1729 description=message, 1730 line=token.line, 1731 col=token.col, 1732 start_context=start_context, 1733 highlight=highlight, 1734 end_context=end_context, 1735 ) 1736 1737 if self.error_level == ErrorLevel.IMMEDIATE: 1738 raise error 1739 1740 self.errors.append(error) 1741 1742 def expression( 1743 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1744 ) -> E: 1745 """ 1746 Creates a new, validated Expression. 1747 1748 Args: 1749 exp_class: The expression class to instantiate. 1750 comments: An optional list of comments to attach to the expression. 1751 kwargs: The arguments to set for the expression along with their respective values. 1752 1753 Returns: 1754 The target expression. 1755 """ 1756 instance = exp_class(**kwargs) 1757 instance.add_comments(comments) if comments else self._add_comments(instance) 1758 return self.validate_expression(instance) 1759 1760 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1761 if expression and self._prev_comments: 1762 expression.add_comments(self._prev_comments) 1763 self._prev_comments = None 1764 1765 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1766 """ 1767 Validates an Expression, making sure that all its mandatory arguments are set. 1768 1769 Args: 1770 expression: The expression to validate. 1771 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1772 1773 Returns: 1774 The validated expression. 1775 """ 1776 if self.error_level != ErrorLevel.IGNORE: 1777 for error_message in expression.error_messages(args): 1778 self.raise_error(error_message) 1779 1780 return expression 1781 1782 def _find_sql(self, start: Token, end: Token) -> str: 1783 return self.sql[start.start : end.end + 1] 1784 1785 def _is_connected(self) -> bool: 1786 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1787 1788 def _advance(self, times: int = 1) -> None: 1789 self._index += times 1790 self._curr = seq_get(self._tokens, self._index) 1791 self._next = seq_get(self._tokens, self._index + 1) 1792 1793 if self._index > 0: 1794 self._prev = self._tokens[self._index - 1] 1795 self._prev_comments = self._prev.comments 1796 else: 1797 self._prev = None 1798 self._prev_comments = None 1799 1800 def _retreat(self, index: int) -> None: 1801 if index != self._index: 1802 self._advance(index - self._index) 1803 1804 def _warn_unsupported(self) -> None: 1805 if len(self._tokens) <= 1: 1806 return 1807 1808 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1809 # interested in emitting a warning for the one being currently processed. 1810 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1811 1812 logger.warning( 1813 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1814 ) 1815 1816 def _parse_command(self) -> exp.Command: 1817 self._warn_unsupported() 1818 return self.expression( 1819 exp.Command, 1820 comments=self._prev_comments, 1821 this=self._prev.text.upper(), 1822 expression=self._parse_string(), 1823 ) 1824 1825 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1826 """ 1827 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1828 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1829 solve this by setting & resetting the parser state accordingly 1830 """ 1831 index = self._index 1832 error_level = self.error_level 1833 1834 self.error_level = ErrorLevel.IMMEDIATE 1835 try: 1836 this = parse_method() 1837 except ParseError: 1838 this = None 1839 finally: 1840 if not this or retreat: 1841 self._retreat(index) 1842 self.error_level = error_level 1843 1844 return this 1845 1846 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1847 start = self._prev 1848 exists = self._parse_exists() if allow_exists else None 1849 1850 self._match(TokenType.ON) 1851 1852 materialized = self._match_text_seq("MATERIALIZED") 1853 kind = self._match_set(self.CREATABLES) and self._prev 1854 if not kind: 1855 return self._parse_as_command(start) 1856 1857 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1858 this = self._parse_user_defined_function(kind=kind.token_type) 1859 elif kind.token_type == TokenType.TABLE: 1860 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1861 elif kind.token_type == TokenType.COLUMN: 1862 this = self._parse_column() 1863 else: 1864 this = self._parse_id_var() 1865 1866 self._match(TokenType.IS) 1867 1868 return self.expression( 1869 exp.Comment, 1870 this=this, 1871 kind=kind.text, 1872 expression=self._parse_string(), 1873 exists=exists, 1874 materialized=materialized, 1875 ) 1876 1877 def _parse_to_table( 1878 self, 1879 ) -> exp.ToTableProperty: 1880 table = self._parse_table_parts(schema=True) 1881 return self.expression(exp.ToTableProperty, this=table) 1882 1883 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1884 def _parse_ttl(self) -> exp.Expression: 1885 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1886 this = self._parse_bitwise() 1887 1888 if self._match_text_seq("DELETE"): 1889 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1890 if self._match_text_seq("RECOMPRESS"): 1891 return self.expression( 1892 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1893 ) 1894 if self._match_text_seq("TO", "DISK"): 1895 return self.expression( 1896 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1897 ) 1898 if self._match_text_seq("TO", "VOLUME"): 1899 return self.expression( 1900 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1901 ) 1902 1903 return this 1904 1905 expressions = self._parse_csv(_parse_ttl_action) 1906 where = self._parse_where() 1907 group = self._parse_group() 1908 1909 aggregates = None 1910 if group and self._match(TokenType.SET): 1911 aggregates = self._parse_csv(self._parse_set_item) 1912 1913 return self.expression( 1914 exp.MergeTreeTTL, 1915 expressions=expressions, 1916 where=where, 1917 group=group, 1918 aggregates=aggregates, 1919 ) 1920 1921 def _parse_statement(self) -> t.Optional[exp.Expression]: 1922 if self._curr is None: 1923 return None 1924 1925 if self._match_set(self.STATEMENT_PARSERS): 1926 comments = self._prev_comments 1927 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1928 stmt.add_comments(comments, prepend=True) 1929 return stmt 1930 1931 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1932 return self._parse_command() 1933 1934 expression = self._parse_expression() 1935 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1936 return self._parse_query_modifiers(expression) 1937 1938 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1939 start = self._prev 1940 temporary = self._match(TokenType.TEMPORARY) 1941 materialized = self._match_text_seq("MATERIALIZED") 1942 1943 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1944 if not kind: 1945 return self._parse_as_command(start) 1946 1947 concurrently = self._match_text_seq("CONCURRENTLY") 1948 if_exists = exists or self._parse_exists() 1949 1950 if kind == "COLUMN": 1951 this = self._parse_column() 1952 else: 1953 this = self._parse_table_parts( 1954 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1955 ) 1956 1957 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1958 1959 if self._match(TokenType.L_PAREN, advance=False): 1960 expressions = self._parse_wrapped_csv(self._parse_types) 1961 else: 1962 expressions = None 1963 1964 return self.expression( 1965 exp.Drop, 1966 exists=if_exists, 1967 this=this, 1968 expressions=expressions, 1969 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1970 temporary=temporary, 1971 materialized=materialized, 1972 cascade=self._match_text_seq("CASCADE"), 1973 constraints=self._match_text_seq("CONSTRAINTS"), 1974 purge=self._match_text_seq("PURGE"), 1975 cluster=cluster, 1976 concurrently=concurrently, 1977 ) 1978 1979 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1980 return ( 1981 self._match_text_seq("IF") 1982 and (not not_ or self._match(TokenType.NOT)) 1983 and self._match(TokenType.EXISTS) 1984 ) 1985 1986 def _parse_create(self) -> exp.Create | exp.Command: 1987 # Note: this can't be None because we've matched a statement parser 1988 start = self._prev 1989 1990 replace = ( 1991 start.token_type == TokenType.REPLACE 1992 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1993 or self._match_pair(TokenType.OR, TokenType.ALTER) 1994 ) 1995 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1996 1997 unique = self._match(TokenType.UNIQUE) 1998 1999 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2000 clustered = True 2001 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2002 "COLUMNSTORE" 2003 ): 2004 clustered = False 2005 else: 2006 clustered = None 2007 2008 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2009 self._advance() 2010 2011 properties = None 2012 create_token = self._match_set(self.CREATABLES) and self._prev 2013 2014 if not create_token: 2015 # exp.Properties.Location.POST_CREATE 2016 properties = self._parse_properties() 2017 create_token = self._match_set(self.CREATABLES) and self._prev 2018 2019 if not properties or not create_token: 2020 return self._parse_as_command(start) 2021 2022 concurrently = self._match_text_seq("CONCURRENTLY") 2023 exists = self._parse_exists(not_=True) 2024 this = None 2025 expression: t.Optional[exp.Expression] = None 2026 indexes = None 2027 no_schema_binding = None 2028 begin = None 2029 end = None 2030 clone = None 2031 2032 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2033 nonlocal properties 2034 if properties and temp_props: 2035 properties.expressions.extend(temp_props.expressions) 2036 elif temp_props: 2037 properties = temp_props 2038 2039 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2040 this = self._parse_user_defined_function(kind=create_token.token_type) 2041 2042 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2043 extend_props(self._parse_properties()) 2044 2045 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2046 extend_props(self._parse_properties()) 2047 2048 if not expression: 2049 if self._match(TokenType.COMMAND): 2050 expression = self._parse_as_command(self._prev) 2051 else: 2052 begin = self._match(TokenType.BEGIN) 2053 return_ = self._match_text_seq("RETURN") 2054 2055 if self._match(TokenType.STRING, advance=False): 2056 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2057 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2058 expression = self._parse_string() 2059 extend_props(self._parse_properties()) 2060 else: 2061 expression = self._parse_user_defined_function_expression() 2062 2063 end = self._match_text_seq("END") 2064 2065 if return_: 2066 expression = self.expression(exp.Return, this=expression) 2067 elif create_token.token_type == TokenType.INDEX: 2068 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2069 if not self._match(TokenType.ON): 2070 index = self._parse_id_var() 2071 anonymous = False 2072 else: 2073 index = None 2074 anonymous = True 2075 2076 this = self._parse_index(index=index, anonymous=anonymous) 2077 elif create_token.token_type in self.DB_CREATABLES: 2078 table_parts = self._parse_table_parts( 2079 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2080 ) 2081 2082 # exp.Properties.Location.POST_NAME 2083 self._match(TokenType.COMMA) 2084 extend_props(self._parse_properties(before=True)) 2085 2086 this = self._parse_schema(this=table_parts) 2087 2088 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2089 extend_props(self._parse_properties()) 2090 2091 has_alias = self._match(TokenType.ALIAS) 2092 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2093 # exp.Properties.Location.POST_ALIAS 2094 extend_props(self._parse_properties()) 2095 2096 if create_token.token_type == TokenType.SEQUENCE: 2097 expression = self._parse_types() 2098 props = self._parse_properties() 2099 if props: 2100 sequence_props = exp.SequenceProperties() 2101 options = [] 2102 for prop in props: 2103 if isinstance(prop, exp.SequenceProperties): 2104 for arg, value in prop.args.items(): 2105 if arg == "options": 2106 options.extend(value) 2107 else: 2108 sequence_props.set(arg, value) 2109 prop.pop() 2110 2111 if options: 2112 sequence_props.set("options", options) 2113 2114 props.append("expressions", sequence_props) 2115 extend_props(props) 2116 else: 2117 expression = self._parse_ddl_select() 2118 2119 # Some dialects also support using a table as an alias instead of a SELECT. 2120 # Here we fallback to this as an alternative. 2121 if not expression and has_alias: 2122 expression = self._try_parse(self._parse_table_parts) 2123 2124 if create_token.token_type == TokenType.TABLE: 2125 # exp.Properties.Location.POST_EXPRESSION 2126 extend_props(self._parse_properties()) 2127 2128 indexes = [] 2129 while True: 2130 index = self._parse_index() 2131 2132 # exp.Properties.Location.POST_INDEX 2133 extend_props(self._parse_properties()) 2134 if not index: 2135 break 2136 else: 2137 self._match(TokenType.COMMA) 2138 indexes.append(index) 2139 elif create_token.token_type == TokenType.VIEW: 2140 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2141 no_schema_binding = True 2142 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2143 extend_props(self._parse_properties()) 2144 2145 shallow = self._match_text_seq("SHALLOW") 2146 2147 if self._match_texts(self.CLONE_KEYWORDS): 2148 copy = self._prev.text.lower() == "copy" 2149 clone = self.expression( 2150 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2151 ) 2152 2153 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2154 return self._parse_as_command(start) 2155 2156 create_kind_text = create_token.text.upper() 2157 return self.expression( 2158 exp.Create, 2159 this=this, 2160 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2161 replace=replace, 2162 refresh=refresh, 2163 unique=unique, 2164 expression=expression, 2165 exists=exists, 2166 properties=properties, 2167 indexes=indexes, 2168 no_schema_binding=no_schema_binding, 2169 begin=begin, 2170 end=end, 2171 clone=clone, 2172 concurrently=concurrently, 2173 clustered=clustered, 2174 ) 2175 2176 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2177 seq = exp.SequenceProperties() 2178 2179 options = [] 2180 index = self._index 2181 2182 while self._curr: 2183 self._match(TokenType.COMMA) 2184 if self._match_text_seq("INCREMENT"): 2185 self._match_text_seq("BY") 2186 self._match_text_seq("=") 2187 seq.set("increment", self._parse_term()) 2188 elif self._match_text_seq("MINVALUE"): 2189 seq.set("minvalue", self._parse_term()) 2190 elif self._match_text_seq("MAXVALUE"): 2191 seq.set("maxvalue", self._parse_term()) 2192 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2193 self._match_text_seq("=") 2194 seq.set("start", self._parse_term()) 2195 elif self._match_text_seq("CACHE"): 2196 # T-SQL allows empty CACHE which is initialized dynamically 2197 seq.set("cache", self._parse_number() or True) 2198 elif self._match_text_seq("OWNED", "BY"): 2199 # "OWNED BY NONE" is the default 2200 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2201 else: 2202 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2203 if opt: 2204 options.append(opt) 2205 else: 2206 break 2207 2208 seq.set("options", options if options else None) 2209 return None if self._index == index else seq 2210 2211 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2212 # only used for teradata currently 2213 self._match(TokenType.COMMA) 2214 2215 kwargs = { 2216 "no": self._match_text_seq("NO"), 2217 "dual": self._match_text_seq("DUAL"), 2218 "before": self._match_text_seq("BEFORE"), 2219 "default": self._match_text_seq("DEFAULT"), 2220 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2221 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2222 "after": self._match_text_seq("AFTER"), 2223 "minimum": self._match_texts(("MIN", "MINIMUM")), 2224 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2225 } 2226 2227 if self._match_texts(self.PROPERTY_PARSERS): 2228 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2229 try: 2230 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2231 except TypeError: 2232 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2233 2234 return None 2235 2236 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2237 return self._parse_wrapped_csv(self._parse_property) 2238 2239 def _parse_property(self) -> t.Optional[exp.Expression]: 2240 if self._match_texts(self.PROPERTY_PARSERS): 2241 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2242 2243 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2244 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2245 2246 if self._match_text_seq("COMPOUND", "SORTKEY"): 2247 return self._parse_sortkey(compound=True) 2248 2249 if self._match_text_seq("SQL", "SECURITY"): 2250 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2251 2252 index = self._index 2253 2254 seq_props = self._parse_sequence_properties() 2255 if seq_props: 2256 return seq_props 2257 2258 self._retreat(index) 2259 key = self._parse_column() 2260 2261 if not self._match(TokenType.EQ): 2262 self._retreat(index) 2263 return None 2264 2265 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2266 if isinstance(key, exp.Column): 2267 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2268 2269 value = self._parse_bitwise() or self._parse_var(any_token=True) 2270 2271 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2272 if isinstance(value, exp.Column): 2273 value = exp.var(value.name) 2274 2275 return self.expression(exp.Property, this=key, value=value) 2276 2277 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2278 if self._match_text_seq("BY"): 2279 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2280 2281 self._match(TokenType.ALIAS) 2282 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2283 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2284 2285 return self.expression( 2286 exp.FileFormatProperty, 2287 this=( 2288 self.expression( 2289 exp.InputOutputFormat, 2290 input_format=input_format, 2291 output_format=output_format, 2292 ) 2293 if input_format or output_format 2294 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2295 ), 2296 hive_format=True, 2297 ) 2298 2299 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2300 field = self._parse_field() 2301 if isinstance(field, exp.Identifier) and not field.quoted: 2302 field = exp.var(field) 2303 2304 return field 2305 2306 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2307 self._match(TokenType.EQ) 2308 self._match(TokenType.ALIAS) 2309 2310 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2311 2312 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2313 properties = [] 2314 while True: 2315 if before: 2316 prop = self._parse_property_before() 2317 else: 2318 prop = self._parse_property() 2319 if not prop: 2320 break 2321 for p in ensure_list(prop): 2322 properties.append(p) 2323 2324 if properties: 2325 return self.expression(exp.Properties, expressions=properties) 2326 2327 return None 2328 2329 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2330 return self.expression( 2331 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2332 ) 2333 2334 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2335 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2336 security_specifier = self._prev.text.upper() 2337 return self.expression(exp.SecurityProperty, this=security_specifier) 2338 return None 2339 2340 def _parse_settings_property(self) -> exp.SettingsProperty: 2341 return self.expression( 2342 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2343 ) 2344 2345 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2346 if self._index >= 2: 2347 pre_volatile_token = self._tokens[self._index - 2] 2348 else: 2349 pre_volatile_token = None 2350 2351 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2352 return exp.VolatileProperty() 2353 2354 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2355 2356 def _parse_retention_period(self) -> exp.Var: 2357 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2358 number = self._parse_number() 2359 number_str = f"{number} " if number else "" 2360 unit = self._parse_var(any_token=True) 2361 return exp.var(f"{number_str}{unit}") 2362 2363 def _parse_system_versioning_property( 2364 self, with_: bool = False 2365 ) -> exp.WithSystemVersioningProperty: 2366 self._match(TokenType.EQ) 2367 prop = self.expression( 2368 exp.WithSystemVersioningProperty, 2369 **{ # type: ignore 2370 "on": True, 2371 "with": with_, 2372 }, 2373 ) 2374 2375 if self._match_text_seq("OFF"): 2376 prop.set("on", False) 2377 return prop 2378 2379 self._match(TokenType.ON) 2380 if self._match(TokenType.L_PAREN): 2381 while self._curr and not self._match(TokenType.R_PAREN): 2382 if self._match_text_seq("HISTORY_TABLE", "="): 2383 prop.set("this", self._parse_table_parts()) 2384 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2385 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2386 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2387 prop.set("retention_period", self._parse_retention_period()) 2388 2389 self._match(TokenType.COMMA) 2390 2391 return prop 2392 2393 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2394 self._match(TokenType.EQ) 2395 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2396 prop = self.expression(exp.DataDeletionProperty, on=on) 2397 2398 if self._match(TokenType.L_PAREN): 2399 while self._curr and not self._match(TokenType.R_PAREN): 2400 if self._match_text_seq("FILTER_COLUMN", "="): 2401 prop.set("filter_column", self._parse_column()) 2402 elif self._match_text_seq("RETENTION_PERIOD", "="): 2403 prop.set("retention_period", self._parse_retention_period()) 2404 2405 self._match(TokenType.COMMA) 2406 2407 return prop 2408 2409 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2410 kind = "HASH" 2411 expressions: t.Optional[t.List[exp.Expression]] = None 2412 if self._match_text_seq("BY", "HASH"): 2413 expressions = self._parse_wrapped_csv(self._parse_id_var) 2414 elif self._match_text_seq("BY", "RANDOM"): 2415 kind = "RANDOM" 2416 2417 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2418 buckets: t.Optional[exp.Expression] = None 2419 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2420 buckets = self._parse_number() 2421 2422 return self.expression( 2423 exp.DistributedByProperty, 2424 expressions=expressions, 2425 kind=kind, 2426 buckets=buckets, 2427 order=self._parse_order(), 2428 ) 2429 2430 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2431 self._match_text_seq("KEY") 2432 expressions = self._parse_wrapped_id_vars() 2433 return self.expression(expr_type, expressions=expressions) 2434 2435 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2436 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2437 prop = self._parse_system_versioning_property(with_=True) 2438 self._match_r_paren() 2439 return prop 2440 2441 if self._match(TokenType.L_PAREN, advance=False): 2442 return self._parse_wrapped_properties() 2443 2444 if self._match_text_seq("JOURNAL"): 2445 return self._parse_withjournaltable() 2446 2447 if self._match_texts(self.VIEW_ATTRIBUTES): 2448 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2449 2450 if self._match_text_seq("DATA"): 2451 return self._parse_withdata(no=False) 2452 elif self._match_text_seq("NO", "DATA"): 2453 return self._parse_withdata(no=True) 2454 2455 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2456 return self._parse_serde_properties(with_=True) 2457 2458 if self._match(TokenType.SCHEMA): 2459 return self.expression( 2460 exp.WithSchemaBindingProperty, 2461 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2462 ) 2463 2464 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2465 return self.expression( 2466 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2467 ) 2468 2469 if not self._next: 2470 return None 2471 2472 return self._parse_withisolatedloading() 2473 2474 def _parse_procedure_option(self) -> exp.Expression | None: 2475 if self._match_text_seq("EXECUTE", "AS"): 2476 return self.expression( 2477 exp.ExecuteAsProperty, 2478 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2479 or self._parse_string(), 2480 ) 2481 2482 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2483 2484 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2485 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2486 self._match(TokenType.EQ) 2487 2488 user = self._parse_id_var() 2489 self._match(TokenType.PARAMETER) 2490 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2491 2492 if not user or not host: 2493 return None 2494 2495 return exp.DefinerProperty(this=f"{user}@{host}") 2496 2497 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2498 self._match(TokenType.TABLE) 2499 self._match(TokenType.EQ) 2500 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2501 2502 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2503 return self.expression(exp.LogProperty, no=no) 2504 2505 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2506 return self.expression(exp.JournalProperty, **kwargs) 2507 2508 def _parse_checksum(self) -> exp.ChecksumProperty: 2509 self._match(TokenType.EQ) 2510 2511 on = None 2512 if self._match(TokenType.ON): 2513 on = True 2514 elif self._match_text_seq("OFF"): 2515 on = False 2516 2517 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2518 2519 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2520 return self.expression( 2521 exp.Cluster, 2522 expressions=( 2523 self._parse_wrapped_csv(self._parse_ordered) 2524 if wrapped 2525 else self._parse_csv(self._parse_ordered) 2526 ), 2527 ) 2528 2529 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2530 self._match_text_seq("BY") 2531 2532 self._match_l_paren() 2533 expressions = self._parse_csv(self._parse_column) 2534 self._match_r_paren() 2535 2536 if self._match_text_seq("SORTED", "BY"): 2537 self._match_l_paren() 2538 sorted_by = self._parse_csv(self._parse_ordered) 2539 self._match_r_paren() 2540 else: 2541 sorted_by = None 2542 2543 self._match(TokenType.INTO) 2544 buckets = self._parse_number() 2545 self._match_text_seq("BUCKETS") 2546 2547 return self.expression( 2548 exp.ClusteredByProperty, 2549 expressions=expressions, 2550 sorted_by=sorted_by, 2551 buckets=buckets, 2552 ) 2553 2554 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2555 if not self._match_text_seq("GRANTS"): 2556 self._retreat(self._index - 1) 2557 return None 2558 2559 return self.expression(exp.CopyGrantsProperty) 2560 2561 def _parse_freespace(self) -> exp.FreespaceProperty: 2562 self._match(TokenType.EQ) 2563 return self.expression( 2564 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2565 ) 2566 2567 def _parse_mergeblockratio( 2568 self, no: bool = False, default: bool = False 2569 ) -> exp.MergeBlockRatioProperty: 2570 if self._match(TokenType.EQ): 2571 return self.expression( 2572 exp.MergeBlockRatioProperty, 2573 this=self._parse_number(), 2574 percent=self._match(TokenType.PERCENT), 2575 ) 2576 2577 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2578 2579 def _parse_datablocksize( 2580 self, 2581 default: t.Optional[bool] = None, 2582 minimum: t.Optional[bool] = None, 2583 maximum: t.Optional[bool] = None, 2584 ) -> exp.DataBlocksizeProperty: 2585 self._match(TokenType.EQ) 2586 size = self._parse_number() 2587 2588 units = None 2589 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2590 units = self._prev.text 2591 2592 return self.expression( 2593 exp.DataBlocksizeProperty, 2594 size=size, 2595 units=units, 2596 default=default, 2597 minimum=minimum, 2598 maximum=maximum, 2599 ) 2600 2601 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2602 self._match(TokenType.EQ) 2603 always = self._match_text_seq("ALWAYS") 2604 manual = self._match_text_seq("MANUAL") 2605 never = self._match_text_seq("NEVER") 2606 default = self._match_text_seq("DEFAULT") 2607 2608 autotemp = None 2609 if self._match_text_seq("AUTOTEMP"): 2610 autotemp = self._parse_schema() 2611 2612 return self.expression( 2613 exp.BlockCompressionProperty, 2614 always=always, 2615 manual=manual, 2616 never=never, 2617 default=default, 2618 autotemp=autotemp, 2619 ) 2620 2621 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2622 index = self._index 2623 no = self._match_text_seq("NO") 2624 concurrent = self._match_text_seq("CONCURRENT") 2625 2626 if not self._match_text_seq("ISOLATED", "LOADING"): 2627 self._retreat(index) 2628 return None 2629 2630 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2631 return self.expression( 2632 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2633 ) 2634 2635 def _parse_locking(self) -> exp.LockingProperty: 2636 if self._match(TokenType.TABLE): 2637 kind = "TABLE" 2638 elif self._match(TokenType.VIEW): 2639 kind = "VIEW" 2640 elif self._match(TokenType.ROW): 2641 kind = "ROW" 2642 elif self._match_text_seq("DATABASE"): 2643 kind = "DATABASE" 2644 else: 2645 kind = None 2646 2647 if kind in ("DATABASE", "TABLE", "VIEW"): 2648 this = self._parse_table_parts() 2649 else: 2650 this = None 2651 2652 if self._match(TokenType.FOR): 2653 for_or_in = "FOR" 2654 elif self._match(TokenType.IN): 2655 for_or_in = "IN" 2656 else: 2657 for_or_in = None 2658 2659 if self._match_text_seq("ACCESS"): 2660 lock_type = "ACCESS" 2661 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2662 lock_type = "EXCLUSIVE" 2663 elif self._match_text_seq("SHARE"): 2664 lock_type = "SHARE" 2665 elif self._match_text_seq("READ"): 2666 lock_type = "READ" 2667 elif self._match_text_seq("WRITE"): 2668 lock_type = "WRITE" 2669 elif self._match_text_seq("CHECKSUM"): 2670 lock_type = "CHECKSUM" 2671 else: 2672 lock_type = None 2673 2674 override = self._match_text_seq("OVERRIDE") 2675 2676 return self.expression( 2677 exp.LockingProperty, 2678 this=this, 2679 kind=kind, 2680 for_or_in=for_or_in, 2681 lock_type=lock_type, 2682 override=override, 2683 ) 2684 2685 def _parse_partition_by(self) -> t.List[exp.Expression]: 2686 if self._match(TokenType.PARTITION_BY): 2687 return self._parse_csv(self._parse_assignment) 2688 return [] 2689 2690 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2691 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2692 if self._match_text_seq("MINVALUE"): 2693 return exp.var("MINVALUE") 2694 if self._match_text_seq("MAXVALUE"): 2695 return exp.var("MAXVALUE") 2696 return self._parse_bitwise() 2697 2698 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2699 expression = None 2700 from_expressions = None 2701 to_expressions = None 2702 2703 if self._match(TokenType.IN): 2704 this = self._parse_wrapped_csv(self._parse_bitwise) 2705 elif self._match(TokenType.FROM): 2706 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2707 self._match_text_seq("TO") 2708 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2709 elif self._match_text_seq("WITH", "(", "MODULUS"): 2710 this = self._parse_number() 2711 self._match_text_seq(",", "REMAINDER") 2712 expression = self._parse_number() 2713 self._match_r_paren() 2714 else: 2715 self.raise_error("Failed to parse partition bound spec.") 2716 2717 return self.expression( 2718 exp.PartitionBoundSpec, 2719 this=this, 2720 expression=expression, 2721 from_expressions=from_expressions, 2722 to_expressions=to_expressions, 2723 ) 2724 2725 # https://www.postgresql.org/docs/current/sql-createtable.html 2726 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2727 if not self._match_text_seq("OF"): 2728 self._retreat(self._index - 1) 2729 return None 2730 2731 this = self._parse_table(schema=True) 2732 2733 if self._match(TokenType.DEFAULT): 2734 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2735 elif self._match_text_seq("FOR", "VALUES"): 2736 expression = self._parse_partition_bound_spec() 2737 else: 2738 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2739 2740 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2741 2742 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2743 self._match(TokenType.EQ) 2744 return self.expression( 2745 exp.PartitionedByProperty, 2746 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2747 ) 2748 2749 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2750 if self._match_text_seq("AND", "STATISTICS"): 2751 statistics = True 2752 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2753 statistics = False 2754 else: 2755 statistics = None 2756 2757 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2758 2759 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2760 if self._match_text_seq("SQL"): 2761 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2762 return None 2763 2764 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2765 if self._match_text_seq("SQL", "DATA"): 2766 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2767 return None 2768 2769 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2770 if self._match_text_seq("PRIMARY", "INDEX"): 2771 return exp.NoPrimaryIndexProperty() 2772 if self._match_text_seq("SQL"): 2773 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2774 return None 2775 2776 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2777 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2778 return exp.OnCommitProperty() 2779 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2780 return exp.OnCommitProperty(delete=True) 2781 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2782 2783 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2784 if self._match_text_seq("SQL", "DATA"): 2785 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2786 return None 2787 2788 def _parse_distkey(self) -> exp.DistKeyProperty: 2789 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2790 2791 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2792 table = self._parse_table(schema=True) 2793 2794 options = [] 2795 while self._match_texts(("INCLUDING", "EXCLUDING")): 2796 this = self._prev.text.upper() 2797 2798 id_var = self._parse_id_var() 2799 if not id_var: 2800 return None 2801 2802 options.append( 2803 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2804 ) 2805 2806 return self.expression(exp.LikeProperty, this=table, expressions=options) 2807 2808 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2809 return self.expression( 2810 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2811 ) 2812 2813 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2814 self._match(TokenType.EQ) 2815 return self.expression( 2816 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2817 ) 2818 2819 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2820 self._match_text_seq("WITH", "CONNECTION") 2821 return self.expression( 2822 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2823 ) 2824 2825 def _parse_returns(self) -> exp.ReturnsProperty: 2826 value: t.Optional[exp.Expression] 2827 null = None 2828 is_table = self._match(TokenType.TABLE) 2829 2830 if is_table: 2831 if self._match(TokenType.LT): 2832 value = self.expression( 2833 exp.Schema, 2834 this="TABLE", 2835 expressions=self._parse_csv(self._parse_struct_types), 2836 ) 2837 if not self._match(TokenType.GT): 2838 self.raise_error("Expecting >") 2839 else: 2840 value = self._parse_schema(exp.var("TABLE")) 2841 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2842 null = True 2843 value = None 2844 else: 2845 value = self._parse_types() 2846 2847 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2848 2849 def _parse_describe(self) -> exp.Describe: 2850 kind = self._match_set(self.CREATABLES) and self._prev.text 2851 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2852 if self._match(TokenType.DOT): 2853 style = None 2854 self._retreat(self._index - 2) 2855 2856 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2857 2858 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2859 this = self._parse_statement() 2860 else: 2861 this = self._parse_table(schema=True) 2862 2863 properties = self._parse_properties() 2864 expressions = properties.expressions if properties else None 2865 partition = self._parse_partition() 2866 return self.expression( 2867 exp.Describe, 2868 this=this, 2869 style=style, 2870 kind=kind, 2871 expressions=expressions, 2872 partition=partition, 2873 format=format, 2874 ) 2875 2876 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2877 kind = self._prev.text.upper() 2878 expressions = [] 2879 2880 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2881 if self._match(TokenType.WHEN): 2882 expression = self._parse_disjunction() 2883 self._match(TokenType.THEN) 2884 else: 2885 expression = None 2886 2887 else_ = self._match(TokenType.ELSE) 2888 2889 if not self._match(TokenType.INTO): 2890 return None 2891 2892 return self.expression( 2893 exp.ConditionalInsert, 2894 this=self.expression( 2895 exp.Insert, 2896 this=self._parse_table(schema=True), 2897 expression=self._parse_derived_table_values(), 2898 ), 2899 expression=expression, 2900 else_=else_, 2901 ) 2902 2903 expression = parse_conditional_insert() 2904 while expression is not None: 2905 expressions.append(expression) 2906 expression = parse_conditional_insert() 2907 2908 return self.expression( 2909 exp.MultitableInserts, 2910 kind=kind, 2911 comments=comments, 2912 expressions=expressions, 2913 source=self._parse_table(), 2914 ) 2915 2916 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2917 comments = [] 2918 hint = self._parse_hint() 2919 overwrite = self._match(TokenType.OVERWRITE) 2920 ignore = self._match(TokenType.IGNORE) 2921 local = self._match_text_seq("LOCAL") 2922 alternative = None 2923 is_function = None 2924 2925 if self._match_text_seq("DIRECTORY"): 2926 this: t.Optional[exp.Expression] = self.expression( 2927 exp.Directory, 2928 this=self._parse_var_or_string(), 2929 local=local, 2930 row_format=self._parse_row_format(match_row=True), 2931 ) 2932 else: 2933 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2934 comments += ensure_list(self._prev_comments) 2935 return self._parse_multitable_inserts(comments) 2936 2937 if self._match(TokenType.OR): 2938 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2939 2940 self._match(TokenType.INTO) 2941 comments += ensure_list(self._prev_comments) 2942 self._match(TokenType.TABLE) 2943 is_function = self._match(TokenType.FUNCTION) 2944 2945 this = ( 2946 self._parse_table(schema=True, parse_partition=True) 2947 if not is_function 2948 else self._parse_function() 2949 ) 2950 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2951 this.set("alias", self._parse_table_alias()) 2952 2953 returning = self._parse_returning() 2954 2955 return self.expression( 2956 exp.Insert, 2957 comments=comments, 2958 hint=hint, 2959 is_function=is_function, 2960 this=this, 2961 stored=self._match_text_seq("STORED") and self._parse_stored(), 2962 by_name=self._match_text_seq("BY", "NAME"), 2963 exists=self._parse_exists(), 2964 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2965 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2966 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2967 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2968 conflict=self._parse_on_conflict(), 2969 returning=returning or self._parse_returning(), 2970 overwrite=overwrite, 2971 alternative=alternative, 2972 ignore=ignore, 2973 source=self._match(TokenType.TABLE) and self._parse_table(), 2974 ) 2975 2976 def _parse_kill(self) -> exp.Kill: 2977 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2978 2979 return self.expression( 2980 exp.Kill, 2981 this=self._parse_primary(), 2982 kind=kind, 2983 ) 2984 2985 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2986 conflict = self._match_text_seq("ON", "CONFLICT") 2987 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2988 2989 if not conflict and not duplicate: 2990 return None 2991 2992 conflict_keys = None 2993 constraint = None 2994 2995 if conflict: 2996 if self._match_text_seq("ON", "CONSTRAINT"): 2997 constraint = self._parse_id_var() 2998 elif self._match(TokenType.L_PAREN): 2999 conflict_keys = self._parse_csv(self._parse_id_var) 3000 self._match_r_paren() 3001 3002 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3003 if self._prev.token_type == TokenType.UPDATE: 3004 self._match(TokenType.SET) 3005 expressions = self._parse_csv(self._parse_equality) 3006 else: 3007 expressions = None 3008 3009 return self.expression( 3010 exp.OnConflict, 3011 duplicate=duplicate, 3012 expressions=expressions, 3013 action=action, 3014 conflict_keys=conflict_keys, 3015 constraint=constraint, 3016 where=self._parse_where(), 3017 ) 3018 3019 def _parse_returning(self) -> t.Optional[exp.Returning]: 3020 if not self._match(TokenType.RETURNING): 3021 return None 3022 return self.expression( 3023 exp.Returning, 3024 expressions=self._parse_csv(self._parse_expression), 3025 into=self._match(TokenType.INTO) and self._parse_table_part(), 3026 ) 3027 3028 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3029 if not self._match(TokenType.FORMAT): 3030 return None 3031 return self._parse_row_format() 3032 3033 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3034 index = self._index 3035 with_ = with_ or self._match_text_seq("WITH") 3036 3037 if not self._match(TokenType.SERDE_PROPERTIES): 3038 self._retreat(index) 3039 return None 3040 return self.expression( 3041 exp.SerdeProperties, 3042 **{ # type: ignore 3043 "expressions": self._parse_wrapped_properties(), 3044 "with": with_, 3045 }, 3046 ) 3047 3048 def _parse_row_format( 3049 self, match_row: bool = False 3050 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3051 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3052 return None 3053 3054 if self._match_text_seq("SERDE"): 3055 this = self._parse_string() 3056 3057 serde_properties = self._parse_serde_properties() 3058 3059 return self.expression( 3060 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3061 ) 3062 3063 self._match_text_seq("DELIMITED") 3064 3065 kwargs = {} 3066 3067 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3068 kwargs["fields"] = self._parse_string() 3069 if self._match_text_seq("ESCAPED", "BY"): 3070 kwargs["escaped"] = self._parse_string() 3071 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3072 kwargs["collection_items"] = self._parse_string() 3073 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3074 kwargs["map_keys"] = self._parse_string() 3075 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3076 kwargs["lines"] = self._parse_string() 3077 if self._match_text_seq("NULL", "DEFINED", "AS"): 3078 kwargs["null"] = self._parse_string() 3079 3080 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3081 3082 def _parse_load(self) -> exp.LoadData | exp.Command: 3083 if self._match_text_seq("DATA"): 3084 local = self._match_text_seq("LOCAL") 3085 self._match_text_seq("INPATH") 3086 inpath = self._parse_string() 3087 overwrite = self._match(TokenType.OVERWRITE) 3088 self._match_pair(TokenType.INTO, TokenType.TABLE) 3089 3090 return self.expression( 3091 exp.LoadData, 3092 this=self._parse_table(schema=True), 3093 local=local, 3094 overwrite=overwrite, 3095 inpath=inpath, 3096 partition=self._parse_partition(), 3097 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3098 serde=self._match_text_seq("SERDE") and self._parse_string(), 3099 ) 3100 return self._parse_as_command(self._prev) 3101 3102 def _parse_delete(self) -> exp.Delete: 3103 # This handles MySQL's "Multiple-Table Syntax" 3104 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3105 tables = None 3106 if not self._match(TokenType.FROM, advance=False): 3107 tables = self._parse_csv(self._parse_table) or None 3108 3109 returning = self._parse_returning() 3110 3111 return self.expression( 3112 exp.Delete, 3113 tables=tables, 3114 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3115 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3116 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3117 where=self._parse_where(), 3118 returning=returning or self._parse_returning(), 3119 limit=self._parse_limit(), 3120 ) 3121 3122 def _parse_update(self) -> exp.Update: 3123 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3124 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3125 returning = self._parse_returning() 3126 return self.expression( 3127 exp.Update, 3128 **{ # type: ignore 3129 "this": this, 3130 "expressions": expressions, 3131 "from": self._parse_from(joins=True), 3132 "where": self._parse_where(), 3133 "returning": returning or self._parse_returning(), 3134 "order": self._parse_order(), 3135 "limit": self._parse_limit(), 3136 }, 3137 ) 3138 3139 def _parse_use(self) -> exp.Use: 3140 return self.expression( 3141 exp.Use, 3142 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3143 this=self._parse_table(schema=False), 3144 ) 3145 3146 def _parse_uncache(self) -> exp.Uncache: 3147 if not self._match(TokenType.TABLE): 3148 self.raise_error("Expecting TABLE after UNCACHE") 3149 3150 return self.expression( 3151 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3152 ) 3153 3154 def _parse_cache(self) -> exp.Cache: 3155 lazy = self._match_text_seq("LAZY") 3156 self._match(TokenType.TABLE) 3157 table = self._parse_table(schema=True) 3158 3159 options = [] 3160 if self._match_text_seq("OPTIONS"): 3161 self._match_l_paren() 3162 k = self._parse_string() 3163 self._match(TokenType.EQ) 3164 v = self._parse_string() 3165 options = [k, v] 3166 self._match_r_paren() 3167 3168 self._match(TokenType.ALIAS) 3169 return self.expression( 3170 exp.Cache, 3171 this=table, 3172 lazy=lazy, 3173 options=options, 3174 expression=self._parse_select(nested=True), 3175 ) 3176 3177 def _parse_partition(self) -> t.Optional[exp.Partition]: 3178 if not self._match_texts(self.PARTITION_KEYWORDS): 3179 return None 3180 3181 return self.expression( 3182 exp.Partition, 3183 subpartition=self._prev.text.upper() == "SUBPARTITION", 3184 expressions=self._parse_wrapped_csv(self._parse_assignment), 3185 ) 3186 3187 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3188 def _parse_value_expression() -> t.Optional[exp.Expression]: 3189 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3190 return exp.var(self._prev.text.upper()) 3191 return self._parse_expression() 3192 3193 if self._match(TokenType.L_PAREN): 3194 expressions = self._parse_csv(_parse_value_expression) 3195 self._match_r_paren() 3196 return self.expression(exp.Tuple, expressions=expressions) 3197 3198 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3199 expression = self._parse_expression() 3200 if expression: 3201 return self.expression(exp.Tuple, expressions=[expression]) 3202 return None 3203 3204 def _parse_projections(self) -> t.List[exp.Expression]: 3205 return self._parse_expressions() 3206 3207 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3208 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3209 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3210 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3211 ) 3212 elif self._match(TokenType.FROM): 3213 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3214 # Support parentheses for duckdb FROM-first syntax 3215 select = self._parse_select(from_=from_) 3216 if select: 3217 if not select.args.get("from"): 3218 select.set("from", from_) 3219 this = select 3220 else: 3221 this = exp.select("*").from_(t.cast(exp.From, from_)) 3222 else: 3223 this = ( 3224 self._parse_table(consume_pipe=True) 3225 if table 3226 else self._parse_select(nested=True, parse_set_operation=False) 3227 ) 3228 3229 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3230 # in case a modifier (e.g. join) is following 3231 if table and isinstance(this, exp.Values) and this.alias: 3232 alias = this.args["alias"].pop() 3233 this = exp.Table(this=this, alias=alias) 3234 3235 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3236 3237 return this 3238 3239 def _parse_select( 3240 self, 3241 nested: bool = False, 3242 table: bool = False, 3243 parse_subquery_alias: bool = True, 3244 parse_set_operation: bool = True, 3245 consume_pipe: bool = True, 3246 from_: t.Optional[exp.From] = None, 3247 ) -> t.Optional[exp.Expression]: 3248 query = self._parse_select_query( 3249 nested=nested, 3250 table=table, 3251 parse_subquery_alias=parse_subquery_alias, 3252 parse_set_operation=parse_set_operation, 3253 ) 3254 3255 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3256 if not query and from_: 3257 query = exp.select("*").from_(from_) 3258 if isinstance(query, exp.Query): 3259 query = self._parse_pipe_syntax_query(query) 3260 query = query.subquery(copy=False) if query and table else query 3261 3262 return query 3263 3264 def _parse_select_query( 3265 self, 3266 nested: bool = False, 3267 table: bool = False, 3268 parse_subquery_alias: bool = True, 3269 parse_set_operation: bool = True, 3270 ) -> t.Optional[exp.Expression]: 3271 cte = self._parse_with() 3272 3273 if cte: 3274 this = self._parse_statement() 3275 3276 if not this: 3277 self.raise_error("Failed to parse any statement following CTE") 3278 return cte 3279 3280 if "with" in this.arg_types: 3281 this.set("with", cte) 3282 else: 3283 self.raise_error(f"{this.key} does not support CTE") 3284 this = cte 3285 3286 return this 3287 3288 # duckdb supports leading with FROM x 3289 from_ = ( 3290 self._parse_from(consume_pipe=True) 3291 if self._match(TokenType.FROM, advance=False) 3292 else None 3293 ) 3294 3295 if self._match(TokenType.SELECT): 3296 comments = self._prev_comments 3297 3298 hint = self._parse_hint() 3299 3300 if self._next and not self._next.token_type == TokenType.DOT: 3301 all_ = self._match(TokenType.ALL) 3302 distinct = self._match_set(self.DISTINCT_TOKENS) 3303 else: 3304 all_, distinct = None, None 3305 3306 kind = ( 3307 self._match(TokenType.ALIAS) 3308 and self._match_texts(("STRUCT", "VALUE")) 3309 and self._prev.text.upper() 3310 ) 3311 3312 if distinct: 3313 distinct = self.expression( 3314 exp.Distinct, 3315 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3316 ) 3317 3318 if all_ and distinct: 3319 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3320 3321 operation_modifiers = [] 3322 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3323 operation_modifiers.append(exp.var(self._prev.text.upper())) 3324 3325 limit = self._parse_limit(top=True) 3326 projections = self._parse_projections() 3327 3328 this = self.expression( 3329 exp.Select, 3330 kind=kind, 3331 hint=hint, 3332 distinct=distinct, 3333 expressions=projections, 3334 limit=limit, 3335 operation_modifiers=operation_modifiers or None, 3336 ) 3337 this.comments = comments 3338 3339 into = self._parse_into() 3340 if into: 3341 this.set("into", into) 3342 3343 if not from_: 3344 from_ = self._parse_from() 3345 3346 if from_: 3347 this.set("from", from_) 3348 3349 this = self._parse_query_modifiers(this) 3350 elif (table or nested) and self._match(TokenType.L_PAREN): 3351 this = self._parse_wrapped_select(table=table) 3352 3353 # We return early here so that the UNION isn't attached to the subquery by the 3354 # following call to _parse_set_operations, but instead becomes the parent node 3355 self._match_r_paren() 3356 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3357 elif self._match(TokenType.VALUES, advance=False): 3358 this = self._parse_derived_table_values() 3359 elif from_: 3360 this = exp.select("*").from_(from_.this, copy=False) 3361 elif self._match(TokenType.SUMMARIZE): 3362 table = self._match(TokenType.TABLE) 3363 this = self._parse_select() or self._parse_string() or self._parse_table() 3364 return self.expression(exp.Summarize, this=this, table=table) 3365 elif self._match(TokenType.DESCRIBE): 3366 this = self._parse_describe() 3367 elif self._match_text_seq("STREAM"): 3368 this = self._parse_function() 3369 if this: 3370 this = self.expression(exp.Stream, this=this) 3371 else: 3372 self._retreat(self._index - 1) 3373 else: 3374 this = None 3375 3376 return self._parse_set_operations(this) if parse_set_operation else this 3377 3378 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3379 self._match_text_seq("SEARCH") 3380 3381 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3382 3383 if not kind: 3384 return None 3385 3386 self._match_text_seq("FIRST", "BY") 3387 3388 return self.expression( 3389 exp.RecursiveWithSearch, 3390 kind=kind, 3391 this=self._parse_id_var(), 3392 expression=self._match_text_seq("SET") and self._parse_id_var(), 3393 using=self._match_text_seq("USING") and self._parse_id_var(), 3394 ) 3395 3396 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3397 if not skip_with_token and not self._match(TokenType.WITH): 3398 return None 3399 3400 comments = self._prev_comments 3401 recursive = self._match(TokenType.RECURSIVE) 3402 3403 last_comments = None 3404 expressions = [] 3405 while True: 3406 cte = self._parse_cte() 3407 if isinstance(cte, exp.CTE): 3408 expressions.append(cte) 3409 if last_comments: 3410 cte.add_comments(last_comments) 3411 3412 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3413 break 3414 else: 3415 self._match(TokenType.WITH) 3416 3417 last_comments = self._prev_comments 3418 3419 return self.expression( 3420 exp.With, 3421 comments=comments, 3422 expressions=expressions, 3423 recursive=recursive, 3424 search=self._parse_recursive_with_search(), 3425 ) 3426 3427 def _parse_cte(self) -> t.Optional[exp.CTE]: 3428 index = self._index 3429 3430 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3431 if not alias or not alias.this: 3432 self.raise_error("Expected CTE to have alias") 3433 3434 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3435 self._retreat(index) 3436 return None 3437 3438 comments = self._prev_comments 3439 3440 if self._match_text_seq("NOT", "MATERIALIZED"): 3441 materialized = False 3442 elif self._match_text_seq("MATERIALIZED"): 3443 materialized = True 3444 else: 3445 materialized = None 3446 3447 cte = self.expression( 3448 exp.CTE, 3449 this=self._parse_wrapped(self._parse_statement), 3450 alias=alias, 3451 materialized=materialized, 3452 comments=comments, 3453 ) 3454 3455 values = cte.this 3456 if isinstance(values, exp.Values): 3457 if values.alias: 3458 cte.set("this", exp.select("*").from_(values)) 3459 else: 3460 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3461 3462 return cte 3463 3464 def _parse_table_alias( 3465 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3466 ) -> t.Optional[exp.TableAlias]: 3467 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3468 # so this section tries to parse the clause version and if it fails, it treats the token 3469 # as an identifier (alias) 3470 if self._can_parse_limit_or_offset(): 3471 return None 3472 3473 any_token = self._match(TokenType.ALIAS) 3474 alias = ( 3475 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3476 or self._parse_string_as_identifier() 3477 ) 3478 3479 index = self._index 3480 if self._match(TokenType.L_PAREN): 3481 columns = self._parse_csv(self._parse_function_parameter) 3482 self._match_r_paren() if columns else self._retreat(index) 3483 else: 3484 columns = None 3485 3486 if not alias and not columns: 3487 return None 3488 3489 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3490 3491 # We bubble up comments from the Identifier to the TableAlias 3492 if isinstance(alias, exp.Identifier): 3493 table_alias.add_comments(alias.pop_comments()) 3494 3495 return table_alias 3496 3497 def _parse_subquery( 3498 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3499 ) -> t.Optional[exp.Subquery]: 3500 if not this: 3501 return None 3502 3503 return self.expression( 3504 exp.Subquery, 3505 this=this, 3506 pivots=self._parse_pivots(), 3507 alias=self._parse_table_alias() if parse_alias else None, 3508 sample=self._parse_table_sample(), 3509 ) 3510 3511 def _implicit_unnests_to_explicit(self, this: E) -> E: 3512 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3513 3514 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3515 for i, join in enumerate(this.args.get("joins") or []): 3516 table = join.this 3517 normalized_table = table.copy() 3518 normalized_table.meta["maybe_column"] = True 3519 normalized_table = _norm(normalized_table, dialect=self.dialect) 3520 3521 if isinstance(table, exp.Table) and not join.args.get("on"): 3522 if normalized_table.parts[0].name in refs: 3523 table_as_column = table.to_column() 3524 unnest = exp.Unnest(expressions=[table_as_column]) 3525 3526 # Table.to_column creates a parent Alias node that we want to convert to 3527 # a TableAlias and attach to the Unnest, so it matches the parser's output 3528 if isinstance(table.args.get("alias"), exp.TableAlias): 3529 table_as_column.replace(table_as_column.this) 3530 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3531 3532 table.replace(unnest) 3533 3534 refs.add(normalized_table.alias_or_name) 3535 3536 return this 3537 3538 def _parse_query_modifiers( 3539 self, this: t.Optional[exp.Expression] 3540 ) -> t.Optional[exp.Expression]: 3541 if isinstance(this, self.MODIFIABLES): 3542 for join in self._parse_joins(): 3543 this.append("joins", join) 3544 for lateral in iter(self._parse_lateral, None): 3545 this.append("laterals", lateral) 3546 3547 while True: 3548 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3549 modifier_token = self._curr 3550 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3551 key, expression = parser(self) 3552 3553 if expression: 3554 if this.args.get(key): 3555 self.raise_error( 3556 f"Found multiple '{modifier_token.text.upper()}' clauses", 3557 token=modifier_token, 3558 ) 3559 3560 this.set(key, expression) 3561 if key == "limit": 3562 offset = expression.args.pop("offset", None) 3563 3564 if offset: 3565 offset = exp.Offset(expression=offset) 3566 this.set("offset", offset) 3567 3568 limit_by_expressions = expression.expressions 3569 expression.set("expressions", None) 3570 offset.set("expressions", limit_by_expressions) 3571 continue 3572 break 3573 3574 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3575 this = self._implicit_unnests_to_explicit(this) 3576 3577 return this 3578 3579 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3580 start = self._curr 3581 while self._curr: 3582 self._advance() 3583 3584 end = self._tokens[self._index - 1] 3585 return exp.Hint(expressions=[self._find_sql(start, end)]) 3586 3587 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3588 return self._parse_function_call() 3589 3590 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3591 start_index = self._index 3592 should_fallback_to_string = False 3593 3594 hints = [] 3595 try: 3596 for hint in iter( 3597 lambda: self._parse_csv( 3598 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3599 ), 3600 [], 3601 ): 3602 hints.extend(hint) 3603 except ParseError: 3604 should_fallback_to_string = True 3605 3606 if should_fallback_to_string or self._curr: 3607 self._retreat(start_index) 3608 return self._parse_hint_fallback_to_string() 3609 3610 return self.expression(exp.Hint, expressions=hints) 3611 3612 def _parse_hint(self) -> t.Optional[exp.Hint]: 3613 if self._match(TokenType.HINT) and self._prev_comments: 3614 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3615 3616 return None 3617 3618 def _parse_into(self) -> t.Optional[exp.Into]: 3619 if not self._match(TokenType.INTO): 3620 return None 3621 3622 temp = self._match(TokenType.TEMPORARY) 3623 unlogged = self._match_text_seq("UNLOGGED") 3624 self._match(TokenType.TABLE) 3625 3626 return self.expression( 3627 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3628 ) 3629 3630 def _parse_from( 3631 self, 3632 joins: bool = False, 3633 skip_from_token: bool = False, 3634 consume_pipe: bool = False, 3635 ) -> t.Optional[exp.From]: 3636 if not skip_from_token and not self._match(TokenType.FROM): 3637 return None 3638 3639 return self.expression( 3640 exp.From, 3641 comments=self._prev_comments, 3642 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3643 ) 3644 3645 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3646 return self.expression( 3647 exp.MatchRecognizeMeasure, 3648 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3649 this=self._parse_expression(), 3650 ) 3651 3652 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3653 if not self._match(TokenType.MATCH_RECOGNIZE): 3654 return None 3655 3656 self._match_l_paren() 3657 3658 partition = self._parse_partition_by() 3659 order = self._parse_order() 3660 3661 measures = ( 3662 self._parse_csv(self._parse_match_recognize_measure) 3663 if self._match_text_seq("MEASURES") 3664 else None 3665 ) 3666 3667 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3668 rows = exp.var("ONE ROW PER MATCH") 3669 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3670 text = "ALL ROWS PER MATCH" 3671 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3672 text += " SHOW EMPTY MATCHES" 3673 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3674 text += " OMIT EMPTY MATCHES" 3675 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3676 text += " WITH UNMATCHED ROWS" 3677 rows = exp.var(text) 3678 else: 3679 rows = None 3680 3681 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3682 text = "AFTER MATCH SKIP" 3683 if self._match_text_seq("PAST", "LAST", "ROW"): 3684 text += " PAST LAST ROW" 3685 elif self._match_text_seq("TO", "NEXT", "ROW"): 3686 text += " TO NEXT ROW" 3687 elif self._match_text_seq("TO", "FIRST"): 3688 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3689 elif self._match_text_seq("TO", "LAST"): 3690 text += f" TO LAST {self._advance_any().text}" # type: ignore 3691 after = exp.var(text) 3692 else: 3693 after = None 3694 3695 if self._match_text_seq("PATTERN"): 3696 self._match_l_paren() 3697 3698 if not self._curr: 3699 self.raise_error("Expecting )", self._curr) 3700 3701 paren = 1 3702 start = self._curr 3703 3704 while self._curr and paren > 0: 3705 if self._curr.token_type == TokenType.L_PAREN: 3706 paren += 1 3707 if self._curr.token_type == TokenType.R_PAREN: 3708 paren -= 1 3709 3710 end = self._prev 3711 self._advance() 3712 3713 if paren > 0: 3714 self.raise_error("Expecting )", self._curr) 3715 3716 pattern = exp.var(self._find_sql(start, end)) 3717 else: 3718 pattern = None 3719 3720 define = ( 3721 self._parse_csv(self._parse_name_as_expression) 3722 if self._match_text_seq("DEFINE") 3723 else None 3724 ) 3725 3726 self._match_r_paren() 3727 3728 return self.expression( 3729 exp.MatchRecognize, 3730 partition_by=partition, 3731 order=order, 3732 measures=measures, 3733 rows=rows, 3734 after=after, 3735 pattern=pattern, 3736 define=define, 3737 alias=self._parse_table_alias(), 3738 ) 3739 3740 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3741 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3742 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3743 cross_apply = False 3744 3745 if cross_apply is not None: 3746 this = self._parse_select(table=True) 3747 view = None 3748 outer = None 3749 elif self._match(TokenType.LATERAL): 3750 this = self._parse_select(table=True) 3751 view = self._match(TokenType.VIEW) 3752 outer = self._match(TokenType.OUTER) 3753 else: 3754 return None 3755 3756 if not this: 3757 this = ( 3758 self._parse_unnest() 3759 or self._parse_function() 3760 or self._parse_id_var(any_token=False) 3761 ) 3762 3763 while self._match(TokenType.DOT): 3764 this = exp.Dot( 3765 this=this, 3766 expression=self._parse_function() or self._parse_id_var(any_token=False), 3767 ) 3768 3769 ordinality: t.Optional[bool] = None 3770 3771 if view: 3772 table = self._parse_id_var(any_token=False) 3773 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3774 table_alias: t.Optional[exp.TableAlias] = self.expression( 3775 exp.TableAlias, this=table, columns=columns 3776 ) 3777 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3778 # We move the alias from the lateral's child node to the lateral itself 3779 table_alias = this.args["alias"].pop() 3780 else: 3781 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3782 table_alias = self._parse_table_alias() 3783 3784 return self.expression( 3785 exp.Lateral, 3786 this=this, 3787 view=view, 3788 outer=outer, 3789 alias=table_alias, 3790 cross_apply=cross_apply, 3791 ordinality=ordinality, 3792 ) 3793 3794 def _parse_join_parts( 3795 self, 3796 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3797 return ( 3798 self._match_set(self.JOIN_METHODS) and self._prev, 3799 self._match_set(self.JOIN_SIDES) and self._prev, 3800 self._match_set(self.JOIN_KINDS) and self._prev, 3801 ) 3802 3803 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3804 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3805 this = self._parse_column() 3806 if isinstance(this, exp.Column): 3807 return this.this 3808 return this 3809 3810 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3811 3812 def _parse_join( 3813 self, skip_join_token: bool = False, parse_bracket: bool = False 3814 ) -> t.Optional[exp.Join]: 3815 if self._match(TokenType.COMMA): 3816 table = self._try_parse(self._parse_table) 3817 cross_join = self.expression(exp.Join, this=table) if table else None 3818 3819 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3820 cross_join.set("kind", "CROSS") 3821 3822 return cross_join 3823 3824 index = self._index 3825 method, side, kind = self._parse_join_parts() 3826 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3827 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3828 join_comments = self._prev_comments 3829 3830 if not skip_join_token and not join: 3831 self._retreat(index) 3832 kind = None 3833 method = None 3834 side = None 3835 3836 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3837 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3838 3839 if not skip_join_token and not join and not outer_apply and not cross_apply: 3840 return None 3841 3842 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3843 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3844 kwargs["expressions"] = self._parse_csv( 3845 lambda: self._parse_table(parse_bracket=parse_bracket) 3846 ) 3847 3848 if method: 3849 kwargs["method"] = method.text 3850 if side: 3851 kwargs["side"] = side.text 3852 if kind: 3853 kwargs["kind"] = kind.text 3854 if hint: 3855 kwargs["hint"] = hint 3856 3857 if self._match(TokenType.MATCH_CONDITION): 3858 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3859 3860 if self._match(TokenType.ON): 3861 kwargs["on"] = self._parse_assignment() 3862 elif self._match(TokenType.USING): 3863 kwargs["using"] = self._parse_using_identifiers() 3864 elif ( 3865 not method 3866 and not (outer_apply or cross_apply) 3867 and not isinstance(kwargs["this"], exp.Unnest) 3868 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3869 ): 3870 index = self._index 3871 joins: t.Optional[list] = list(self._parse_joins()) 3872 3873 if joins and self._match(TokenType.ON): 3874 kwargs["on"] = self._parse_assignment() 3875 elif joins and self._match(TokenType.USING): 3876 kwargs["using"] = self._parse_using_identifiers() 3877 else: 3878 joins = None 3879 self._retreat(index) 3880 3881 kwargs["this"].set("joins", joins if joins else None) 3882 3883 kwargs["pivots"] = self._parse_pivots() 3884 3885 comments = [c for token in (method, side, kind) if token for c in token.comments] 3886 comments = (join_comments or []) + comments 3887 3888 if ( 3889 self.ADD_JOIN_ON_TRUE 3890 and not kwargs.get("on") 3891 and not kwargs.get("using") 3892 and not kwargs.get("method") 3893 and kwargs.get("kind") in (None, "INNER", "OUTER") 3894 ): 3895 kwargs["on"] = exp.true() 3896 3897 return self.expression(exp.Join, comments=comments, **kwargs) 3898 3899 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3900 this = self._parse_assignment() 3901 3902 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3903 return this 3904 3905 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3906 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3907 3908 return this 3909 3910 def _parse_index_params(self) -> exp.IndexParameters: 3911 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3912 3913 if self._match(TokenType.L_PAREN, advance=False): 3914 columns = self._parse_wrapped_csv(self._parse_with_operator) 3915 else: 3916 columns = None 3917 3918 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3919 partition_by = self._parse_partition_by() 3920 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3921 tablespace = ( 3922 self._parse_var(any_token=True) 3923 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3924 else None 3925 ) 3926 where = self._parse_where() 3927 3928 on = self._parse_field() if self._match(TokenType.ON) else None 3929 3930 return self.expression( 3931 exp.IndexParameters, 3932 using=using, 3933 columns=columns, 3934 include=include, 3935 partition_by=partition_by, 3936 where=where, 3937 with_storage=with_storage, 3938 tablespace=tablespace, 3939 on=on, 3940 ) 3941 3942 def _parse_index( 3943 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3944 ) -> t.Optional[exp.Index]: 3945 if index or anonymous: 3946 unique = None 3947 primary = None 3948 amp = None 3949 3950 self._match(TokenType.ON) 3951 self._match(TokenType.TABLE) # hive 3952 table = self._parse_table_parts(schema=True) 3953 else: 3954 unique = self._match(TokenType.UNIQUE) 3955 primary = self._match_text_seq("PRIMARY") 3956 amp = self._match_text_seq("AMP") 3957 3958 if not self._match(TokenType.INDEX): 3959 return None 3960 3961 index = self._parse_id_var() 3962 table = None 3963 3964 params = self._parse_index_params() 3965 3966 return self.expression( 3967 exp.Index, 3968 this=index, 3969 table=table, 3970 unique=unique, 3971 primary=primary, 3972 amp=amp, 3973 params=params, 3974 ) 3975 3976 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3977 hints: t.List[exp.Expression] = [] 3978 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3979 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3980 hints.append( 3981 self.expression( 3982 exp.WithTableHint, 3983 expressions=self._parse_csv( 3984 lambda: self._parse_function() or self._parse_var(any_token=True) 3985 ), 3986 ) 3987 ) 3988 self._match_r_paren() 3989 else: 3990 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3991 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3992 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3993 3994 self._match_set((TokenType.INDEX, TokenType.KEY)) 3995 if self._match(TokenType.FOR): 3996 hint.set("target", self._advance_any() and self._prev.text.upper()) 3997 3998 hint.set("expressions", self._parse_wrapped_id_vars()) 3999 hints.append(hint) 4000 4001 return hints or None 4002 4003 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4004 return ( 4005 (not schema and self._parse_function(optional_parens=False)) 4006 or self._parse_id_var(any_token=False) 4007 or self._parse_string_as_identifier() 4008 or self._parse_placeholder() 4009 ) 4010 4011 def _parse_table_parts( 4012 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4013 ) -> exp.Table: 4014 catalog = None 4015 db = None 4016 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4017 4018 while self._match(TokenType.DOT): 4019 if catalog: 4020 # This allows nesting the table in arbitrarily many dot expressions if needed 4021 table = self.expression( 4022 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4023 ) 4024 else: 4025 catalog = db 4026 db = table 4027 # "" used for tsql FROM a..b case 4028 table = self._parse_table_part(schema=schema) or "" 4029 4030 if ( 4031 wildcard 4032 and self._is_connected() 4033 and (isinstance(table, exp.Identifier) or not table) 4034 and self._match(TokenType.STAR) 4035 ): 4036 if isinstance(table, exp.Identifier): 4037 table.args["this"] += "*" 4038 else: 4039 table = exp.Identifier(this="*") 4040 4041 # We bubble up comments from the Identifier to the Table 4042 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4043 4044 if is_db_reference: 4045 catalog = db 4046 db = table 4047 table = None 4048 4049 if not table and not is_db_reference: 4050 self.raise_error(f"Expected table name but got {self._curr}") 4051 if not db and is_db_reference: 4052 self.raise_error(f"Expected database name but got {self._curr}") 4053 4054 table = self.expression( 4055 exp.Table, 4056 comments=comments, 4057 this=table, 4058 db=db, 4059 catalog=catalog, 4060 ) 4061 4062 changes = self._parse_changes() 4063 if changes: 4064 table.set("changes", changes) 4065 4066 at_before = self._parse_historical_data() 4067 if at_before: 4068 table.set("when", at_before) 4069 4070 pivots = self._parse_pivots() 4071 if pivots: 4072 table.set("pivots", pivots) 4073 4074 return table 4075 4076 def _parse_table( 4077 self, 4078 schema: bool = False, 4079 joins: bool = False, 4080 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4081 parse_bracket: bool = False, 4082 is_db_reference: bool = False, 4083 parse_partition: bool = False, 4084 consume_pipe: bool = False, 4085 ) -> t.Optional[exp.Expression]: 4086 lateral = self._parse_lateral() 4087 if lateral: 4088 return lateral 4089 4090 unnest = self._parse_unnest() 4091 if unnest: 4092 return unnest 4093 4094 values = self._parse_derived_table_values() 4095 if values: 4096 return values 4097 4098 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4099 if subquery: 4100 if not subquery.args.get("pivots"): 4101 subquery.set("pivots", self._parse_pivots()) 4102 return subquery 4103 4104 bracket = parse_bracket and self._parse_bracket(None) 4105 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4106 4107 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4108 self._parse_table 4109 ) 4110 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4111 4112 only = self._match(TokenType.ONLY) 4113 4114 this = t.cast( 4115 exp.Expression, 4116 bracket 4117 or rows_from 4118 or self._parse_bracket( 4119 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4120 ), 4121 ) 4122 4123 if only: 4124 this.set("only", only) 4125 4126 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4127 self._match_text_seq("*") 4128 4129 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4130 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4131 this.set("partition", self._parse_partition()) 4132 4133 if schema: 4134 return self._parse_schema(this=this) 4135 4136 version = self._parse_version() 4137 4138 if version: 4139 this.set("version", version) 4140 4141 if self.dialect.ALIAS_POST_TABLESAMPLE: 4142 this.set("sample", self._parse_table_sample()) 4143 4144 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4145 if alias: 4146 this.set("alias", alias) 4147 4148 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4149 return self.expression( 4150 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4151 ) 4152 4153 this.set("hints", self._parse_table_hints()) 4154 4155 if not this.args.get("pivots"): 4156 this.set("pivots", self._parse_pivots()) 4157 4158 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4159 this.set("sample", self._parse_table_sample()) 4160 4161 if joins: 4162 for join in self._parse_joins(): 4163 this.append("joins", join) 4164 4165 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4166 this.set("ordinality", True) 4167 this.set("alias", self._parse_table_alias()) 4168 4169 return this 4170 4171 def _parse_version(self) -> t.Optional[exp.Version]: 4172 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4173 this = "TIMESTAMP" 4174 elif self._match(TokenType.VERSION_SNAPSHOT): 4175 this = "VERSION" 4176 else: 4177 return None 4178 4179 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4180 kind = self._prev.text.upper() 4181 start = self._parse_bitwise() 4182 self._match_texts(("TO", "AND")) 4183 end = self._parse_bitwise() 4184 expression: t.Optional[exp.Expression] = self.expression( 4185 exp.Tuple, expressions=[start, end] 4186 ) 4187 elif self._match_text_seq("CONTAINED", "IN"): 4188 kind = "CONTAINED IN" 4189 expression = self.expression( 4190 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4191 ) 4192 elif self._match(TokenType.ALL): 4193 kind = "ALL" 4194 expression = None 4195 else: 4196 self._match_text_seq("AS", "OF") 4197 kind = "AS OF" 4198 expression = self._parse_type() 4199 4200 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4201 4202 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4203 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4204 index = self._index 4205 historical_data = None 4206 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4207 this = self._prev.text.upper() 4208 kind = ( 4209 self._match(TokenType.L_PAREN) 4210 and self._match_texts(self.HISTORICAL_DATA_KIND) 4211 and self._prev.text.upper() 4212 ) 4213 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4214 4215 if expression: 4216 self._match_r_paren() 4217 historical_data = self.expression( 4218 exp.HistoricalData, this=this, kind=kind, expression=expression 4219 ) 4220 else: 4221 self._retreat(index) 4222 4223 return historical_data 4224 4225 def _parse_changes(self) -> t.Optional[exp.Changes]: 4226 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4227 return None 4228 4229 information = self._parse_var(any_token=True) 4230 self._match_r_paren() 4231 4232 return self.expression( 4233 exp.Changes, 4234 information=information, 4235 at_before=self._parse_historical_data(), 4236 end=self._parse_historical_data(), 4237 ) 4238 4239 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4240 if not self._match(TokenType.UNNEST): 4241 return None 4242 4243 expressions = self._parse_wrapped_csv(self._parse_equality) 4244 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4245 4246 alias = self._parse_table_alias() if with_alias else None 4247 4248 if alias: 4249 if self.dialect.UNNEST_COLUMN_ONLY: 4250 if alias.args.get("columns"): 4251 self.raise_error("Unexpected extra column alias in unnest.") 4252 4253 alias.set("columns", [alias.this]) 4254 alias.set("this", None) 4255 4256 columns = alias.args.get("columns") or [] 4257 if offset and len(expressions) < len(columns): 4258 offset = columns.pop() 4259 4260 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4261 self._match(TokenType.ALIAS) 4262 offset = self._parse_id_var( 4263 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4264 ) or exp.to_identifier("offset") 4265 4266 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4267 4268 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4269 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4270 if not is_derived and not ( 4271 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4272 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4273 ): 4274 return None 4275 4276 expressions = self._parse_csv(self._parse_value) 4277 alias = self._parse_table_alias() 4278 4279 if is_derived: 4280 self._match_r_paren() 4281 4282 return self.expression( 4283 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4284 ) 4285 4286 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4287 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4288 as_modifier and self._match_text_seq("USING", "SAMPLE") 4289 ): 4290 return None 4291 4292 bucket_numerator = None 4293 bucket_denominator = None 4294 bucket_field = None 4295 percent = None 4296 size = None 4297 seed = None 4298 4299 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4300 matched_l_paren = self._match(TokenType.L_PAREN) 4301 4302 if self.TABLESAMPLE_CSV: 4303 num = None 4304 expressions = self._parse_csv(self._parse_primary) 4305 else: 4306 expressions = None 4307 num = ( 4308 self._parse_factor() 4309 if self._match(TokenType.NUMBER, advance=False) 4310 else self._parse_primary() or self._parse_placeholder() 4311 ) 4312 4313 if self._match_text_seq("BUCKET"): 4314 bucket_numerator = self._parse_number() 4315 self._match_text_seq("OUT", "OF") 4316 bucket_denominator = bucket_denominator = self._parse_number() 4317 self._match(TokenType.ON) 4318 bucket_field = self._parse_field() 4319 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4320 percent = num 4321 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4322 size = num 4323 else: 4324 percent = num 4325 4326 if matched_l_paren: 4327 self._match_r_paren() 4328 4329 if self._match(TokenType.L_PAREN): 4330 method = self._parse_var(upper=True) 4331 seed = self._match(TokenType.COMMA) and self._parse_number() 4332 self._match_r_paren() 4333 elif self._match_texts(("SEED", "REPEATABLE")): 4334 seed = self._parse_wrapped(self._parse_number) 4335 4336 if not method and self.DEFAULT_SAMPLING_METHOD: 4337 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4338 4339 return self.expression( 4340 exp.TableSample, 4341 expressions=expressions, 4342 method=method, 4343 bucket_numerator=bucket_numerator, 4344 bucket_denominator=bucket_denominator, 4345 bucket_field=bucket_field, 4346 percent=percent, 4347 size=size, 4348 seed=seed, 4349 ) 4350 4351 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4352 return list(iter(self._parse_pivot, None)) or None 4353 4354 def _parse_joins(self) -> t.Iterator[exp.Join]: 4355 return iter(self._parse_join, None) 4356 4357 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4358 if not self._match(TokenType.INTO): 4359 return None 4360 4361 return self.expression( 4362 exp.UnpivotColumns, 4363 this=self._match_text_seq("NAME") and self._parse_column(), 4364 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4365 ) 4366 4367 # https://duckdb.org/docs/sql/statements/pivot 4368 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4369 def _parse_on() -> t.Optional[exp.Expression]: 4370 this = self._parse_bitwise() 4371 4372 if self._match(TokenType.IN): 4373 # PIVOT ... ON col IN (row_val1, row_val2) 4374 return self._parse_in(this) 4375 if self._match(TokenType.ALIAS, advance=False): 4376 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4377 return self._parse_alias(this) 4378 4379 return this 4380 4381 this = self._parse_table() 4382 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4383 into = self._parse_unpivot_columns() 4384 using = self._match(TokenType.USING) and self._parse_csv( 4385 lambda: self._parse_alias(self._parse_function()) 4386 ) 4387 group = self._parse_group() 4388 4389 return self.expression( 4390 exp.Pivot, 4391 this=this, 4392 expressions=expressions, 4393 using=using, 4394 group=group, 4395 unpivot=is_unpivot, 4396 into=into, 4397 ) 4398 4399 def _parse_pivot_in(self) -> exp.In: 4400 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4401 this = self._parse_select_or_expression() 4402 4403 self._match(TokenType.ALIAS) 4404 alias = self._parse_bitwise() 4405 if alias: 4406 if isinstance(alias, exp.Column) and not alias.db: 4407 alias = alias.this 4408 return self.expression(exp.PivotAlias, this=this, alias=alias) 4409 4410 return this 4411 4412 value = self._parse_column() 4413 4414 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4415 self.raise_error("Expecting IN (") 4416 4417 if self._match(TokenType.ANY): 4418 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4419 else: 4420 exprs = self._parse_csv(_parse_aliased_expression) 4421 4422 self._match_r_paren() 4423 return self.expression(exp.In, this=value, expressions=exprs) 4424 4425 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4426 func = self._parse_function() 4427 if not func: 4428 if self._prev and self._prev.token_type == TokenType.COMMA: 4429 return None 4430 self.raise_error("Expecting an aggregation function in PIVOT") 4431 4432 return self._parse_alias(func) 4433 4434 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4435 index = self._index 4436 include_nulls = None 4437 4438 if self._match(TokenType.PIVOT): 4439 unpivot = False 4440 elif self._match(TokenType.UNPIVOT): 4441 unpivot = True 4442 4443 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4444 if self._match_text_seq("INCLUDE", "NULLS"): 4445 include_nulls = True 4446 elif self._match_text_seq("EXCLUDE", "NULLS"): 4447 include_nulls = False 4448 else: 4449 return None 4450 4451 expressions = [] 4452 4453 if not self._match(TokenType.L_PAREN): 4454 self._retreat(index) 4455 return None 4456 4457 if unpivot: 4458 expressions = self._parse_csv(self._parse_column) 4459 else: 4460 expressions = self._parse_csv(self._parse_pivot_aggregation) 4461 4462 if not expressions: 4463 self.raise_error("Failed to parse PIVOT's aggregation list") 4464 4465 if not self._match(TokenType.FOR): 4466 self.raise_error("Expecting FOR") 4467 4468 fields = [] 4469 while True: 4470 field = self._try_parse(self._parse_pivot_in) 4471 if not field: 4472 break 4473 fields.append(field) 4474 4475 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4476 self._parse_bitwise 4477 ) 4478 4479 group = self._parse_group() 4480 4481 self._match_r_paren() 4482 4483 pivot = self.expression( 4484 exp.Pivot, 4485 expressions=expressions, 4486 fields=fields, 4487 unpivot=unpivot, 4488 include_nulls=include_nulls, 4489 default_on_null=default_on_null, 4490 group=group, 4491 ) 4492 4493 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4494 pivot.set("alias", self._parse_table_alias()) 4495 4496 if not unpivot: 4497 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4498 4499 columns: t.List[exp.Expression] = [] 4500 all_fields = [] 4501 for pivot_field in pivot.fields: 4502 pivot_field_expressions = pivot_field.expressions 4503 4504 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4505 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4506 continue 4507 4508 all_fields.append( 4509 [ 4510 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4511 for fld in pivot_field_expressions 4512 ] 4513 ) 4514 4515 if all_fields: 4516 if names: 4517 all_fields.append(names) 4518 4519 # Generate all possible combinations of the pivot columns 4520 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4521 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4522 for fld_parts_tuple in itertools.product(*all_fields): 4523 fld_parts = list(fld_parts_tuple) 4524 4525 if names and self.PREFIXED_PIVOT_COLUMNS: 4526 # Move the "name" to the front of the list 4527 fld_parts.insert(0, fld_parts.pop(-1)) 4528 4529 columns.append(exp.to_identifier("_".join(fld_parts))) 4530 4531 pivot.set("columns", columns) 4532 4533 return pivot 4534 4535 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4536 return [agg.alias for agg in aggregations if agg.alias] 4537 4538 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4539 if not skip_where_token and not self._match(TokenType.PREWHERE): 4540 return None 4541 4542 return self.expression( 4543 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4544 ) 4545 4546 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4547 if not skip_where_token and not self._match(TokenType.WHERE): 4548 return None 4549 4550 return self.expression( 4551 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4552 ) 4553 4554 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4555 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4556 return None 4557 comments = self._prev_comments 4558 4559 elements: t.Dict[str, t.Any] = defaultdict(list) 4560 4561 if self._match(TokenType.ALL): 4562 elements["all"] = True 4563 elif self._match(TokenType.DISTINCT): 4564 elements["all"] = False 4565 4566 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4567 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4568 4569 while True: 4570 index = self._index 4571 4572 elements["expressions"].extend( 4573 self._parse_csv( 4574 lambda: None 4575 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4576 else self._parse_assignment() 4577 ) 4578 ) 4579 4580 before_with_index = self._index 4581 with_prefix = self._match(TokenType.WITH) 4582 4583 if self._match(TokenType.ROLLUP): 4584 elements["rollup"].append( 4585 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4586 ) 4587 elif self._match(TokenType.CUBE): 4588 elements["cube"].append( 4589 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4590 ) 4591 elif self._match(TokenType.GROUPING_SETS): 4592 elements["grouping_sets"].append( 4593 self.expression( 4594 exp.GroupingSets, 4595 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4596 ) 4597 ) 4598 elif self._match_text_seq("TOTALS"): 4599 elements["totals"] = True # type: ignore 4600 4601 if before_with_index <= self._index <= before_with_index + 1: 4602 self._retreat(before_with_index) 4603 break 4604 4605 if index == self._index: 4606 break 4607 4608 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4609 4610 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4611 return self.expression( 4612 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4613 ) 4614 4615 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4616 if self._match(TokenType.L_PAREN): 4617 grouping_set = self._parse_csv(self._parse_column) 4618 self._match_r_paren() 4619 return self.expression(exp.Tuple, expressions=grouping_set) 4620 4621 return self._parse_column() 4622 4623 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4624 if not skip_having_token and not self._match(TokenType.HAVING): 4625 return None 4626 return self.expression( 4627 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4628 ) 4629 4630 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4631 if not self._match(TokenType.QUALIFY): 4632 return None 4633 return self.expression(exp.Qualify, this=self._parse_assignment()) 4634 4635 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4636 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4637 exp.Prior, this=self._parse_bitwise() 4638 ) 4639 connect = self._parse_assignment() 4640 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4641 return connect 4642 4643 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4644 if skip_start_token: 4645 start = None 4646 elif self._match(TokenType.START_WITH): 4647 start = self._parse_assignment() 4648 else: 4649 return None 4650 4651 self._match(TokenType.CONNECT_BY) 4652 nocycle = self._match_text_seq("NOCYCLE") 4653 connect = self._parse_connect_with_prior() 4654 4655 if not start and self._match(TokenType.START_WITH): 4656 start = self._parse_assignment() 4657 4658 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4659 4660 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4661 this = self._parse_id_var(any_token=True) 4662 if self._match(TokenType.ALIAS): 4663 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4664 return this 4665 4666 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4667 if self._match_text_seq("INTERPOLATE"): 4668 return self._parse_wrapped_csv(self._parse_name_as_expression) 4669 return None 4670 4671 def _parse_order( 4672 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4673 ) -> t.Optional[exp.Expression]: 4674 siblings = None 4675 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4676 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4677 return this 4678 4679 siblings = True 4680 4681 return self.expression( 4682 exp.Order, 4683 comments=self._prev_comments, 4684 this=this, 4685 expressions=self._parse_csv(self._parse_ordered), 4686 siblings=siblings, 4687 ) 4688 4689 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4690 if not self._match(token): 4691 return None 4692 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4693 4694 def _parse_ordered( 4695 self, parse_method: t.Optional[t.Callable] = None 4696 ) -> t.Optional[exp.Ordered]: 4697 this = parse_method() if parse_method else self._parse_assignment() 4698 if not this: 4699 return None 4700 4701 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4702 this = exp.var("ALL") 4703 4704 asc = self._match(TokenType.ASC) 4705 desc = self._match(TokenType.DESC) or (asc and False) 4706 4707 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4708 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4709 4710 nulls_first = is_nulls_first or False 4711 explicitly_null_ordered = is_nulls_first or is_nulls_last 4712 4713 if ( 4714 not explicitly_null_ordered 4715 and ( 4716 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4717 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4718 ) 4719 and self.dialect.NULL_ORDERING != "nulls_are_last" 4720 ): 4721 nulls_first = True 4722 4723 if self._match_text_seq("WITH", "FILL"): 4724 with_fill = self.expression( 4725 exp.WithFill, 4726 **{ # type: ignore 4727 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4728 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4729 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4730 "interpolate": self._parse_interpolate(), 4731 }, 4732 ) 4733 else: 4734 with_fill = None 4735 4736 return self.expression( 4737 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4738 ) 4739 4740 def _parse_limit_options(self) -> exp.LimitOptions: 4741 percent = self._match(TokenType.PERCENT) 4742 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4743 self._match_text_seq("ONLY") 4744 with_ties = self._match_text_seq("WITH", "TIES") 4745 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4746 4747 def _parse_limit( 4748 self, 4749 this: t.Optional[exp.Expression] = None, 4750 top: bool = False, 4751 skip_limit_token: bool = False, 4752 ) -> t.Optional[exp.Expression]: 4753 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4754 comments = self._prev_comments 4755 if top: 4756 limit_paren = self._match(TokenType.L_PAREN) 4757 expression = self._parse_term() if limit_paren else self._parse_number() 4758 4759 if limit_paren: 4760 self._match_r_paren() 4761 4762 limit_options = self._parse_limit_options() 4763 else: 4764 limit_options = None 4765 expression = self._parse_term() 4766 4767 if self._match(TokenType.COMMA): 4768 offset = expression 4769 expression = self._parse_term() 4770 else: 4771 offset = None 4772 4773 limit_exp = self.expression( 4774 exp.Limit, 4775 this=this, 4776 expression=expression, 4777 offset=offset, 4778 comments=comments, 4779 limit_options=limit_options, 4780 expressions=self._parse_limit_by(), 4781 ) 4782 4783 return limit_exp 4784 4785 if self._match(TokenType.FETCH): 4786 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4787 direction = self._prev.text.upper() if direction else "FIRST" 4788 4789 count = self._parse_field(tokens=self.FETCH_TOKENS) 4790 4791 return self.expression( 4792 exp.Fetch, 4793 direction=direction, 4794 count=count, 4795 limit_options=self._parse_limit_options(), 4796 ) 4797 4798 return this 4799 4800 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4801 if not self._match(TokenType.OFFSET): 4802 return this 4803 4804 count = self._parse_term() 4805 self._match_set((TokenType.ROW, TokenType.ROWS)) 4806 4807 return self.expression( 4808 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4809 ) 4810 4811 def _can_parse_limit_or_offset(self) -> bool: 4812 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4813 return False 4814 4815 index = self._index 4816 result = bool( 4817 self._try_parse(self._parse_limit, retreat=True) 4818 or self._try_parse(self._parse_offset, retreat=True) 4819 ) 4820 self._retreat(index) 4821 return result 4822 4823 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4824 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4825 4826 def _parse_locks(self) -> t.List[exp.Lock]: 4827 locks = [] 4828 while True: 4829 update, key = None, None 4830 if self._match_text_seq("FOR", "UPDATE"): 4831 update = True 4832 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4833 "LOCK", "IN", "SHARE", "MODE" 4834 ): 4835 update = False 4836 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4837 update, key = False, True 4838 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4839 update, key = True, True 4840 else: 4841 break 4842 4843 expressions = None 4844 if self._match_text_seq("OF"): 4845 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4846 4847 wait: t.Optional[bool | exp.Expression] = None 4848 if self._match_text_seq("NOWAIT"): 4849 wait = True 4850 elif self._match_text_seq("WAIT"): 4851 wait = self._parse_primary() 4852 elif self._match_text_seq("SKIP", "LOCKED"): 4853 wait = False 4854 4855 locks.append( 4856 self.expression( 4857 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4858 ) 4859 ) 4860 4861 return locks 4862 4863 def parse_set_operation( 4864 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4865 ) -> t.Optional[exp.Expression]: 4866 start = self._index 4867 _, side_token, kind_token = self._parse_join_parts() 4868 4869 side = side_token.text if side_token else None 4870 kind = kind_token.text if kind_token else None 4871 4872 if not self._match_set(self.SET_OPERATIONS): 4873 self._retreat(start) 4874 return None 4875 4876 token_type = self._prev.token_type 4877 4878 if token_type == TokenType.UNION: 4879 operation: t.Type[exp.SetOperation] = exp.Union 4880 elif token_type == TokenType.EXCEPT: 4881 operation = exp.Except 4882 else: 4883 operation = exp.Intersect 4884 4885 comments = self._prev.comments 4886 4887 if self._match(TokenType.DISTINCT): 4888 distinct: t.Optional[bool] = True 4889 elif self._match(TokenType.ALL): 4890 distinct = False 4891 else: 4892 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4893 if distinct is None: 4894 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4895 4896 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4897 "STRICT", "CORRESPONDING" 4898 ) 4899 if self._match_text_seq("CORRESPONDING"): 4900 by_name = True 4901 if not side and not kind: 4902 kind = "INNER" 4903 4904 on_column_list = None 4905 if by_name and self._match_texts(("ON", "BY")): 4906 on_column_list = self._parse_wrapped_csv(self._parse_column) 4907 4908 expression = self._parse_select( 4909 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4910 ) 4911 4912 return self.expression( 4913 operation, 4914 comments=comments, 4915 this=this, 4916 distinct=distinct, 4917 by_name=by_name, 4918 expression=expression, 4919 side=side, 4920 kind=kind, 4921 on=on_column_list, 4922 ) 4923 4924 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4925 while this: 4926 setop = self.parse_set_operation(this) 4927 if not setop: 4928 break 4929 this = setop 4930 4931 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4932 expression = this.expression 4933 4934 if expression: 4935 for arg in self.SET_OP_MODIFIERS: 4936 expr = expression.args.get(arg) 4937 if expr: 4938 this.set(arg, expr.pop()) 4939 4940 return this 4941 4942 def _parse_expression(self) -> t.Optional[exp.Expression]: 4943 return self._parse_alias(self._parse_assignment()) 4944 4945 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4946 this = self._parse_disjunction() 4947 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4948 # This allows us to parse <non-identifier token> := <expr> 4949 this = exp.column( 4950 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4951 ) 4952 4953 while self._match_set(self.ASSIGNMENT): 4954 if isinstance(this, exp.Column) and len(this.parts) == 1: 4955 this = this.this 4956 4957 this = self.expression( 4958 self.ASSIGNMENT[self._prev.token_type], 4959 this=this, 4960 comments=self._prev_comments, 4961 expression=self._parse_assignment(), 4962 ) 4963 4964 return this 4965 4966 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4967 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4968 4969 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4970 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4971 4972 def _parse_equality(self) -> t.Optional[exp.Expression]: 4973 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4974 4975 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4976 return self._parse_tokens(self._parse_range, self.COMPARISON) 4977 4978 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4979 this = this or self._parse_bitwise() 4980 negate = self._match(TokenType.NOT) 4981 4982 if self._match_set(self.RANGE_PARSERS): 4983 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4984 if not expression: 4985 return this 4986 4987 this = expression 4988 elif self._match(TokenType.ISNULL): 4989 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4990 4991 # Postgres supports ISNULL and NOTNULL for conditions. 4992 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4993 if self._match(TokenType.NOTNULL): 4994 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4995 this = self.expression(exp.Not, this=this) 4996 4997 if negate: 4998 this = self._negate_range(this) 4999 5000 if self._match(TokenType.IS): 5001 this = self._parse_is(this) 5002 5003 return this 5004 5005 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5006 if not this: 5007 return this 5008 5009 return self.expression(exp.Not, this=this) 5010 5011 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5012 index = self._index - 1 5013 negate = self._match(TokenType.NOT) 5014 5015 if self._match_text_seq("DISTINCT", "FROM"): 5016 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5017 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5018 5019 if self._match(TokenType.JSON): 5020 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5021 5022 if self._match_text_seq("WITH"): 5023 _with = True 5024 elif self._match_text_seq("WITHOUT"): 5025 _with = False 5026 else: 5027 _with = None 5028 5029 unique = self._match(TokenType.UNIQUE) 5030 self._match_text_seq("KEYS") 5031 expression: t.Optional[exp.Expression] = self.expression( 5032 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5033 ) 5034 else: 5035 expression = self._parse_primary() or self._parse_null() 5036 if not expression: 5037 self._retreat(index) 5038 return None 5039 5040 this = self.expression(exp.Is, this=this, expression=expression) 5041 return self.expression(exp.Not, this=this) if negate else this 5042 5043 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5044 unnest = self._parse_unnest(with_alias=False) 5045 if unnest: 5046 this = self.expression(exp.In, this=this, unnest=unnest) 5047 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5048 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5049 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5050 5051 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5052 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5053 else: 5054 this = self.expression(exp.In, this=this, expressions=expressions) 5055 5056 if matched_l_paren: 5057 self._match_r_paren(this) 5058 elif not self._match(TokenType.R_BRACKET, expression=this): 5059 self.raise_error("Expecting ]") 5060 else: 5061 this = self.expression(exp.In, this=this, field=self._parse_column()) 5062 5063 return this 5064 5065 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5066 symmetric = None 5067 if self._match_text_seq("SYMMETRIC"): 5068 symmetric = True 5069 elif self._match_text_seq("ASYMMETRIC"): 5070 symmetric = False 5071 5072 low = self._parse_bitwise() 5073 self._match(TokenType.AND) 5074 high = self._parse_bitwise() 5075 5076 return self.expression( 5077 exp.Between, 5078 this=this, 5079 low=low, 5080 high=high, 5081 symmetric=symmetric, 5082 ) 5083 5084 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5085 if not self._match(TokenType.ESCAPE): 5086 return this 5087 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5088 5089 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5090 index = self._index 5091 5092 if not self._match(TokenType.INTERVAL) and match_interval: 5093 return None 5094 5095 if self._match(TokenType.STRING, advance=False): 5096 this = self._parse_primary() 5097 else: 5098 this = self._parse_term() 5099 5100 if not this or ( 5101 isinstance(this, exp.Column) 5102 and not this.table 5103 and not this.this.quoted 5104 and this.name.upper() == "IS" 5105 ): 5106 self._retreat(index) 5107 return None 5108 5109 unit = self._parse_function() or ( 5110 not self._match(TokenType.ALIAS, advance=False) 5111 and self._parse_var(any_token=True, upper=True) 5112 ) 5113 5114 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5115 # each INTERVAL expression into this canonical form so it's easy to transpile 5116 if this and this.is_number: 5117 this = exp.Literal.string(this.to_py()) 5118 elif this and this.is_string: 5119 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5120 if parts and unit: 5121 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5122 unit = None 5123 self._retreat(self._index - 1) 5124 5125 if len(parts) == 1: 5126 this = exp.Literal.string(parts[0][0]) 5127 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5128 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5129 unit = self.expression( 5130 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5131 ) 5132 5133 interval = self.expression(exp.Interval, this=this, unit=unit) 5134 5135 index = self._index 5136 self._match(TokenType.PLUS) 5137 5138 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5139 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5140 return self.expression( 5141 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5142 ) 5143 5144 self._retreat(index) 5145 return interval 5146 5147 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5148 this = self._parse_term() 5149 5150 while True: 5151 if self._match_set(self.BITWISE): 5152 this = self.expression( 5153 self.BITWISE[self._prev.token_type], 5154 this=this, 5155 expression=self._parse_term(), 5156 ) 5157 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5158 this = self.expression( 5159 exp.DPipe, 5160 this=this, 5161 expression=self._parse_term(), 5162 safe=not self.dialect.STRICT_STRING_CONCAT, 5163 ) 5164 elif self._match(TokenType.DQMARK): 5165 this = self.expression( 5166 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5167 ) 5168 elif self._match_pair(TokenType.LT, TokenType.LT): 5169 this = self.expression( 5170 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5171 ) 5172 elif self._match_pair(TokenType.GT, TokenType.GT): 5173 this = self.expression( 5174 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5175 ) 5176 else: 5177 break 5178 5179 return this 5180 5181 def _parse_term(self) -> t.Optional[exp.Expression]: 5182 this = self._parse_factor() 5183 5184 while self._match_set(self.TERM): 5185 klass = self.TERM[self._prev.token_type] 5186 comments = self._prev_comments 5187 expression = self._parse_factor() 5188 5189 this = self.expression(klass, this=this, comments=comments, expression=expression) 5190 5191 if isinstance(this, exp.Collate): 5192 expr = this.expression 5193 5194 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5195 # fallback to Identifier / Var 5196 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5197 ident = expr.this 5198 if isinstance(ident, exp.Identifier): 5199 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5200 5201 return this 5202 5203 def _parse_factor(self) -> t.Optional[exp.Expression]: 5204 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5205 this = parse_method() 5206 5207 while self._match_set(self.FACTOR): 5208 klass = self.FACTOR[self._prev.token_type] 5209 comments = self._prev_comments 5210 expression = parse_method() 5211 5212 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5213 self._retreat(self._index - 1) 5214 return this 5215 5216 this = self.expression(klass, this=this, comments=comments, expression=expression) 5217 5218 if isinstance(this, exp.Div): 5219 this.args["typed"] = self.dialect.TYPED_DIVISION 5220 this.args["safe"] = self.dialect.SAFE_DIVISION 5221 5222 return this 5223 5224 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5225 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5226 5227 def _parse_unary(self) -> t.Optional[exp.Expression]: 5228 if self._match_set(self.UNARY_PARSERS): 5229 return self.UNARY_PARSERS[self._prev.token_type](self) 5230 return self._parse_at_time_zone(self._parse_type()) 5231 5232 def _parse_type( 5233 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5234 ) -> t.Optional[exp.Expression]: 5235 interval = parse_interval and self._parse_interval() 5236 if interval: 5237 return interval 5238 5239 index = self._index 5240 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5241 5242 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5243 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5244 if isinstance(data_type, exp.Cast): 5245 # This constructor can contain ops directly after it, for instance struct unnesting: 5246 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5247 return self._parse_column_ops(data_type) 5248 5249 if data_type: 5250 index2 = self._index 5251 this = self._parse_primary() 5252 5253 if isinstance(this, exp.Literal): 5254 literal = this.name 5255 this = self._parse_column_ops(this) 5256 5257 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5258 if parser: 5259 return parser(self, this, data_type) 5260 5261 if ( 5262 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5263 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5264 and TIME_ZONE_RE.search(literal) 5265 ): 5266 data_type = exp.DataType.build("TIMESTAMPTZ") 5267 5268 return self.expression(exp.Cast, this=this, to=data_type) 5269 5270 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5271 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5272 # 5273 # If the index difference here is greater than 1, that means the parser itself must have 5274 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5275 # 5276 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5277 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5278 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5279 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5280 # 5281 # In these cases, we don't really want to return the converted type, but instead retreat 5282 # and try to parse a Column or Identifier in the section below. 5283 if data_type.expressions and index2 - index > 1: 5284 self._retreat(index2) 5285 return self._parse_column_ops(data_type) 5286 5287 self._retreat(index) 5288 5289 if fallback_to_identifier: 5290 return self._parse_id_var() 5291 5292 this = self._parse_column() 5293 return this and self._parse_column_ops(this) 5294 5295 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5296 this = self._parse_type() 5297 if not this: 5298 return None 5299 5300 if isinstance(this, exp.Column) and not this.table: 5301 this = exp.var(this.name.upper()) 5302 5303 return self.expression( 5304 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5305 ) 5306 5307 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5308 type_name = identifier.name 5309 5310 while self._match(TokenType.DOT): 5311 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5312 5313 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5314 5315 def _parse_types( 5316 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5317 ) -> t.Optional[exp.Expression]: 5318 index = self._index 5319 5320 this: t.Optional[exp.Expression] = None 5321 prefix = self._match_text_seq("SYSUDTLIB", ".") 5322 5323 if self._match_set(self.TYPE_TOKENS): 5324 type_token = self._prev.token_type 5325 else: 5326 type_token = None 5327 identifier = allow_identifiers and self._parse_id_var( 5328 any_token=False, tokens=(TokenType.VAR,) 5329 ) 5330 if isinstance(identifier, exp.Identifier): 5331 try: 5332 tokens = self.dialect.tokenize(identifier.name) 5333 except TokenError: 5334 tokens = None 5335 5336 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5337 type_token = tokens[0].token_type 5338 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5339 this = self._parse_user_defined_type(identifier) 5340 else: 5341 self._retreat(self._index - 1) 5342 return None 5343 else: 5344 return None 5345 5346 if type_token == TokenType.PSEUDO_TYPE: 5347 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5348 5349 if type_token == TokenType.OBJECT_IDENTIFIER: 5350 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5351 5352 # https://materialize.com/docs/sql/types/map/ 5353 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5354 key_type = self._parse_types( 5355 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5356 ) 5357 if not self._match(TokenType.FARROW): 5358 self._retreat(index) 5359 return None 5360 5361 value_type = self._parse_types( 5362 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5363 ) 5364 if not self._match(TokenType.R_BRACKET): 5365 self._retreat(index) 5366 return None 5367 5368 return exp.DataType( 5369 this=exp.DataType.Type.MAP, 5370 expressions=[key_type, value_type], 5371 nested=True, 5372 prefix=prefix, 5373 ) 5374 5375 nested = type_token in self.NESTED_TYPE_TOKENS 5376 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5377 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5378 expressions = None 5379 maybe_func = False 5380 5381 if self._match(TokenType.L_PAREN): 5382 if is_struct: 5383 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5384 elif nested: 5385 expressions = self._parse_csv( 5386 lambda: self._parse_types( 5387 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5388 ) 5389 ) 5390 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5391 this = expressions[0] 5392 this.set("nullable", True) 5393 self._match_r_paren() 5394 return this 5395 elif type_token in self.ENUM_TYPE_TOKENS: 5396 expressions = self._parse_csv(self._parse_equality) 5397 elif is_aggregate: 5398 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5399 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5400 ) 5401 if not func_or_ident: 5402 return None 5403 expressions = [func_or_ident] 5404 if self._match(TokenType.COMMA): 5405 expressions.extend( 5406 self._parse_csv( 5407 lambda: self._parse_types( 5408 check_func=check_func, 5409 schema=schema, 5410 allow_identifiers=allow_identifiers, 5411 ) 5412 ) 5413 ) 5414 else: 5415 expressions = self._parse_csv(self._parse_type_size) 5416 5417 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5418 if type_token == TokenType.VECTOR and len(expressions) == 2: 5419 expressions = self._parse_vector_expressions(expressions) 5420 5421 if not self._match(TokenType.R_PAREN): 5422 self._retreat(index) 5423 return None 5424 5425 maybe_func = True 5426 5427 values: t.Optional[t.List[exp.Expression]] = None 5428 5429 if nested and self._match(TokenType.LT): 5430 if is_struct: 5431 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5432 else: 5433 expressions = self._parse_csv( 5434 lambda: self._parse_types( 5435 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5436 ) 5437 ) 5438 5439 if not self._match(TokenType.GT): 5440 self.raise_error("Expecting >") 5441 5442 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5443 values = self._parse_csv(self._parse_assignment) 5444 if not values and is_struct: 5445 values = None 5446 self._retreat(self._index - 1) 5447 else: 5448 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5449 5450 if type_token in self.TIMESTAMPS: 5451 if self._match_text_seq("WITH", "TIME", "ZONE"): 5452 maybe_func = False 5453 tz_type = ( 5454 exp.DataType.Type.TIMETZ 5455 if type_token in self.TIMES 5456 else exp.DataType.Type.TIMESTAMPTZ 5457 ) 5458 this = exp.DataType(this=tz_type, expressions=expressions) 5459 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5460 maybe_func = False 5461 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5462 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5463 maybe_func = False 5464 elif type_token == TokenType.INTERVAL: 5465 unit = self._parse_var(upper=True) 5466 if unit: 5467 if self._match_text_seq("TO"): 5468 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5469 5470 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5471 else: 5472 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5473 elif type_token == TokenType.VOID: 5474 this = exp.DataType(this=exp.DataType.Type.NULL) 5475 5476 if maybe_func and check_func: 5477 index2 = self._index 5478 peek = self._parse_string() 5479 5480 if not peek: 5481 self._retreat(index) 5482 return None 5483 5484 self._retreat(index2) 5485 5486 if not this: 5487 if self._match_text_seq("UNSIGNED"): 5488 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5489 if not unsigned_type_token: 5490 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5491 5492 type_token = unsigned_type_token or type_token 5493 5494 this = exp.DataType( 5495 this=exp.DataType.Type[type_token.value], 5496 expressions=expressions, 5497 nested=nested, 5498 prefix=prefix, 5499 ) 5500 5501 # Empty arrays/structs are allowed 5502 if values is not None: 5503 cls = exp.Struct if is_struct else exp.Array 5504 this = exp.cast(cls(expressions=values), this, copy=False) 5505 5506 elif expressions: 5507 this.set("expressions", expressions) 5508 5509 # https://materialize.com/docs/sql/types/list/#type-name 5510 while self._match(TokenType.LIST): 5511 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5512 5513 index = self._index 5514 5515 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5516 matched_array = self._match(TokenType.ARRAY) 5517 5518 while self._curr: 5519 datatype_token = self._prev.token_type 5520 matched_l_bracket = self._match(TokenType.L_BRACKET) 5521 5522 if (not matched_l_bracket and not matched_array) or ( 5523 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5524 ): 5525 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5526 # not to be confused with the fixed size array parsing 5527 break 5528 5529 matched_array = False 5530 values = self._parse_csv(self._parse_assignment) or None 5531 if ( 5532 values 5533 and not schema 5534 and ( 5535 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5536 ) 5537 ): 5538 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5539 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5540 self._retreat(index) 5541 break 5542 5543 this = exp.DataType( 5544 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5545 ) 5546 self._match(TokenType.R_BRACKET) 5547 5548 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5549 converter = self.TYPE_CONVERTERS.get(this.this) 5550 if converter: 5551 this = converter(t.cast(exp.DataType, this)) 5552 5553 return this 5554 5555 def _parse_vector_expressions( 5556 self, expressions: t.List[exp.Expression] 5557 ) -> t.List[exp.Expression]: 5558 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5559 5560 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5561 index = self._index 5562 5563 if ( 5564 self._curr 5565 and self._next 5566 and self._curr.token_type in self.TYPE_TOKENS 5567 and self._next.token_type in self.TYPE_TOKENS 5568 ): 5569 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5570 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5571 this = self._parse_id_var() 5572 else: 5573 this = ( 5574 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5575 or self._parse_id_var() 5576 ) 5577 5578 self._match(TokenType.COLON) 5579 5580 if ( 5581 type_required 5582 and not isinstance(this, exp.DataType) 5583 and not self._match_set(self.TYPE_TOKENS, advance=False) 5584 ): 5585 self._retreat(index) 5586 return self._parse_types() 5587 5588 return self._parse_column_def(this) 5589 5590 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5591 if not self._match_text_seq("AT", "TIME", "ZONE"): 5592 return this 5593 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5594 5595 def _parse_column(self) -> t.Optional[exp.Expression]: 5596 this = self._parse_column_reference() 5597 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5598 5599 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5600 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5601 5602 return column 5603 5604 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5605 this = self._parse_field() 5606 if ( 5607 not this 5608 and self._match(TokenType.VALUES, advance=False) 5609 and self.VALUES_FOLLOWED_BY_PAREN 5610 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5611 ): 5612 this = self._parse_id_var() 5613 5614 if isinstance(this, exp.Identifier): 5615 # We bubble up comments from the Identifier to the Column 5616 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5617 5618 return this 5619 5620 def _parse_colon_as_variant_extract( 5621 self, this: t.Optional[exp.Expression] 5622 ) -> t.Optional[exp.Expression]: 5623 casts = [] 5624 json_path = [] 5625 escape = None 5626 5627 while self._match(TokenType.COLON): 5628 start_index = self._index 5629 5630 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5631 path = self._parse_column_ops( 5632 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5633 ) 5634 5635 # The cast :: operator has a lower precedence than the extraction operator :, so 5636 # we rearrange the AST appropriately to avoid casting the JSON path 5637 while isinstance(path, exp.Cast): 5638 casts.append(path.to) 5639 path = path.this 5640 5641 if casts: 5642 dcolon_offset = next( 5643 i 5644 for i, t in enumerate(self._tokens[start_index:]) 5645 if t.token_type == TokenType.DCOLON 5646 ) 5647 end_token = self._tokens[start_index + dcolon_offset - 1] 5648 else: 5649 end_token = self._prev 5650 5651 if path: 5652 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5653 # it'll roundtrip to a string literal in GET_PATH 5654 if isinstance(path, exp.Identifier) and path.quoted: 5655 escape = True 5656 5657 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5658 5659 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5660 # Databricks transforms it back to the colon/dot notation 5661 if json_path: 5662 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5663 5664 if json_path_expr: 5665 json_path_expr.set("escape", escape) 5666 5667 this = self.expression( 5668 exp.JSONExtract, 5669 this=this, 5670 expression=json_path_expr, 5671 variant_extract=True, 5672 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5673 ) 5674 5675 while casts: 5676 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5677 5678 return this 5679 5680 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5681 return self._parse_types() 5682 5683 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5684 this = self._parse_bracket(this) 5685 5686 while self._match_set(self.COLUMN_OPERATORS): 5687 op_token = self._prev.token_type 5688 op = self.COLUMN_OPERATORS.get(op_token) 5689 5690 if op_token in self.CAST_COLUMN_OPERATORS: 5691 field = self._parse_dcolon() 5692 if not field: 5693 self.raise_error("Expected type") 5694 elif op and self._curr: 5695 field = self._parse_column_reference() or self._parse_bracket() 5696 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5697 field = self._parse_column_ops(field) 5698 else: 5699 field = self._parse_field(any_token=True, anonymous_func=True) 5700 5701 # Function calls can be qualified, e.g., x.y.FOO() 5702 # This converts the final AST to a series of Dots leading to the function call 5703 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5704 if isinstance(field, (exp.Func, exp.Window)) and this: 5705 this = this.transform( 5706 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5707 ) 5708 5709 if op: 5710 this = op(self, this, field) 5711 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5712 this = self.expression( 5713 exp.Column, 5714 comments=this.comments, 5715 this=field, 5716 table=this.this, 5717 db=this.args.get("table"), 5718 catalog=this.args.get("db"), 5719 ) 5720 elif isinstance(field, exp.Window): 5721 # Move the exp.Dot's to the window's function 5722 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5723 field.set("this", window_func) 5724 this = field 5725 else: 5726 this = self.expression(exp.Dot, this=this, expression=field) 5727 5728 if field and field.comments: 5729 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5730 5731 this = self._parse_bracket(this) 5732 5733 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5734 5735 def _parse_paren(self) -> t.Optional[exp.Expression]: 5736 if not self._match(TokenType.L_PAREN): 5737 return None 5738 5739 comments = self._prev_comments 5740 query = self._parse_select() 5741 5742 if query: 5743 expressions = [query] 5744 else: 5745 expressions = self._parse_expressions() 5746 5747 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5748 5749 if not this and self._match(TokenType.R_PAREN, advance=False): 5750 this = self.expression(exp.Tuple) 5751 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5752 this = self._parse_subquery(this=this, parse_alias=False) 5753 elif isinstance(this, exp.Subquery): 5754 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5755 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5756 this = self.expression(exp.Tuple, expressions=expressions) 5757 else: 5758 this = self.expression(exp.Paren, this=this) 5759 5760 if this: 5761 this.add_comments(comments) 5762 5763 self._match_r_paren(expression=this) 5764 return this 5765 5766 def _parse_primary(self) -> t.Optional[exp.Expression]: 5767 if self._match_set(self.PRIMARY_PARSERS): 5768 token_type = self._prev.token_type 5769 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5770 5771 if token_type == TokenType.STRING: 5772 expressions = [primary] 5773 while self._match(TokenType.STRING): 5774 expressions.append(exp.Literal.string(self._prev.text)) 5775 5776 if len(expressions) > 1: 5777 return self.expression(exp.Concat, expressions=expressions) 5778 5779 return primary 5780 5781 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5782 return exp.Literal.number(f"0.{self._prev.text}") 5783 5784 return self._parse_paren() 5785 5786 def _parse_field( 5787 self, 5788 any_token: bool = False, 5789 tokens: t.Optional[t.Collection[TokenType]] = None, 5790 anonymous_func: bool = False, 5791 ) -> t.Optional[exp.Expression]: 5792 if anonymous_func: 5793 field = ( 5794 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5795 or self._parse_primary() 5796 ) 5797 else: 5798 field = self._parse_primary() or self._parse_function( 5799 anonymous=anonymous_func, any_token=any_token 5800 ) 5801 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5802 5803 def _parse_function( 5804 self, 5805 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5806 anonymous: bool = False, 5807 optional_parens: bool = True, 5808 any_token: bool = False, 5809 ) -> t.Optional[exp.Expression]: 5810 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5811 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5812 fn_syntax = False 5813 if ( 5814 self._match(TokenType.L_BRACE, advance=False) 5815 and self._next 5816 and self._next.text.upper() == "FN" 5817 ): 5818 self._advance(2) 5819 fn_syntax = True 5820 5821 func = self._parse_function_call( 5822 functions=functions, 5823 anonymous=anonymous, 5824 optional_parens=optional_parens, 5825 any_token=any_token, 5826 ) 5827 5828 if fn_syntax: 5829 self._match(TokenType.R_BRACE) 5830 5831 return func 5832 5833 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5834 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5835 5836 def _parse_function_call( 5837 self, 5838 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5839 anonymous: bool = False, 5840 optional_parens: bool = True, 5841 any_token: bool = False, 5842 ) -> t.Optional[exp.Expression]: 5843 if not self._curr: 5844 return None 5845 5846 comments = self._curr.comments 5847 prev = self._prev 5848 token = self._curr 5849 token_type = self._curr.token_type 5850 this = self._curr.text 5851 upper = this.upper() 5852 5853 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5854 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5855 self._advance() 5856 return self._parse_window(parser(self)) 5857 5858 if not self._next or self._next.token_type != TokenType.L_PAREN: 5859 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5860 self._advance() 5861 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5862 5863 return None 5864 5865 if any_token: 5866 if token_type in self.RESERVED_TOKENS: 5867 return None 5868 elif token_type not in self.FUNC_TOKENS: 5869 return None 5870 5871 self._advance(2) 5872 5873 parser = self.FUNCTION_PARSERS.get(upper) 5874 if parser and not anonymous: 5875 this = parser(self) 5876 else: 5877 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5878 5879 if subquery_predicate: 5880 expr = None 5881 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5882 expr = self._parse_select() 5883 self._match_r_paren() 5884 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5885 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5886 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5887 self._advance(-1) 5888 expr = self._parse_bitwise() 5889 5890 if expr: 5891 return self.expression(subquery_predicate, comments=comments, this=expr) 5892 5893 if functions is None: 5894 functions = self.FUNCTIONS 5895 5896 function = functions.get(upper) 5897 known_function = function and not anonymous 5898 5899 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5900 args = self._parse_function_args(alias) 5901 5902 post_func_comments = self._curr and self._curr.comments 5903 if known_function and post_func_comments: 5904 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5905 # call we'll construct it as exp.Anonymous, even if it's "known" 5906 if any( 5907 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5908 for comment in post_func_comments 5909 ): 5910 known_function = False 5911 5912 if alias and known_function: 5913 args = self._kv_to_prop_eq(args) 5914 5915 if known_function: 5916 func_builder = t.cast(t.Callable, function) 5917 5918 if "dialect" in func_builder.__code__.co_varnames: 5919 func = func_builder(args, dialect=self.dialect) 5920 else: 5921 func = func_builder(args) 5922 5923 func = self.validate_expression(func, args) 5924 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5925 func.meta["name"] = this 5926 5927 this = func 5928 else: 5929 if token_type == TokenType.IDENTIFIER: 5930 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5931 5932 this = self.expression(exp.Anonymous, this=this, expressions=args) 5933 this = this.update_positions(token) 5934 5935 if isinstance(this, exp.Expression): 5936 this.add_comments(comments) 5937 5938 self._match_r_paren(this) 5939 return self._parse_window(this) 5940 5941 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5942 return expression 5943 5944 def _kv_to_prop_eq( 5945 self, expressions: t.List[exp.Expression], parse_map: bool = False 5946 ) -> t.List[exp.Expression]: 5947 transformed = [] 5948 5949 for index, e in enumerate(expressions): 5950 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5951 if isinstance(e, exp.Alias): 5952 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5953 5954 if not isinstance(e, exp.PropertyEQ): 5955 e = self.expression( 5956 exp.PropertyEQ, 5957 this=e.this if parse_map else exp.to_identifier(e.this.name), 5958 expression=e.expression, 5959 ) 5960 5961 if isinstance(e.this, exp.Column): 5962 e.this.replace(e.this.this) 5963 else: 5964 e = self._to_prop_eq(e, index) 5965 5966 transformed.append(e) 5967 5968 return transformed 5969 5970 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5971 return self._parse_statement() 5972 5973 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5974 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5975 5976 def _parse_user_defined_function( 5977 self, kind: t.Optional[TokenType] = None 5978 ) -> t.Optional[exp.Expression]: 5979 this = self._parse_table_parts(schema=True) 5980 5981 if not self._match(TokenType.L_PAREN): 5982 return this 5983 5984 expressions = self._parse_csv(self._parse_function_parameter) 5985 self._match_r_paren() 5986 return self.expression( 5987 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5988 ) 5989 5990 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5991 literal = self._parse_primary() 5992 if literal: 5993 return self.expression(exp.Introducer, this=token.text, expression=literal) 5994 5995 return self._identifier_expression(token) 5996 5997 def _parse_session_parameter(self) -> exp.SessionParameter: 5998 kind = None 5999 this = self._parse_id_var() or self._parse_primary() 6000 6001 if this and self._match(TokenType.DOT): 6002 kind = this.name 6003 this = self._parse_var() or self._parse_primary() 6004 6005 return self.expression(exp.SessionParameter, this=this, kind=kind) 6006 6007 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6008 return self._parse_id_var() 6009 6010 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6011 index = self._index 6012 6013 if self._match(TokenType.L_PAREN): 6014 expressions = t.cast( 6015 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6016 ) 6017 6018 if not self._match(TokenType.R_PAREN): 6019 self._retreat(index) 6020 else: 6021 expressions = [self._parse_lambda_arg()] 6022 6023 if self._match_set(self.LAMBDAS): 6024 return self.LAMBDAS[self._prev.token_type](self, expressions) 6025 6026 self._retreat(index) 6027 6028 this: t.Optional[exp.Expression] 6029 6030 if self._match(TokenType.DISTINCT): 6031 this = self.expression( 6032 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6033 ) 6034 else: 6035 this = self._parse_select_or_expression(alias=alias) 6036 6037 return self._parse_limit( 6038 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6039 ) 6040 6041 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6042 index = self._index 6043 if not self._match(TokenType.L_PAREN): 6044 return this 6045 6046 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6047 # expr can be of both types 6048 if self._match_set(self.SELECT_START_TOKENS): 6049 self._retreat(index) 6050 return this 6051 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6052 self._match_r_paren() 6053 return self.expression(exp.Schema, this=this, expressions=args) 6054 6055 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6056 return self._parse_column_def(self._parse_field(any_token=True)) 6057 6058 def _parse_column_def( 6059 self, this: t.Optional[exp.Expression], computed_column: bool = True 6060 ) -> t.Optional[exp.Expression]: 6061 # column defs are not really columns, they're identifiers 6062 if isinstance(this, exp.Column): 6063 this = this.this 6064 6065 if not computed_column: 6066 self._match(TokenType.ALIAS) 6067 6068 kind = self._parse_types(schema=True) 6069 6070 if self._match_text_seq("FOR", "ORDINALITY"): 6071 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6072 6073 constraints: t.List[exp.Expression] = [] 6074 6075 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6076 ("ALIAS", "MATERIALIZED") 6077 ): 6078 persisted = self._prev.text.upper() == "MATERIALIZED" 6079 constraint_kind = exp.ComputedColumnConstraint( 6080 this=self._parse_assignment(), 6081 persisted=persisted or self._match_text_seq("PERSISTED"), 6082 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6083 ) 6084 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6085 elif ( 6086 kind 6087 and self._match(TokenType.ALIAS, advance=False) 6088 and ( 6089 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6090 or (self._next and self._next.token_type == TokenType.L_PAREN) 6091 ) 6092 ): 6093 self._advance() 6094 constraints.append( 6095 self.expression( 6096 exp.ColumnConstraint, 6097 kind=exp.ComputedColumnConstraint( 6098 this=self._parse_disjunction(), 6099 persisted=self._match_texts(("STORED", "VIRTUAL")) 6100 and self._prev.text.upper() == "STORED", 6101 ), 6102 ) 6103 ) 6104 6105 while True: 6106 constraint = self._parse_column_constraint() 6107 if not constraint: 6108 break 6109 constraints.append(constraint) 6110 6111 if not kind and not constraints: 6112 return this 6113 6114 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6115 6116 def _parse_auto_increment( 6117 self, 6118 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6119 start = None 6120 increment = None 6121 order = None 6122 6123 if self._match(TokenType.L_PAREN, advance=False): 6124 args = self._parse_wrapped_csv(self._parse_bitwise) 6125 start = seq_get(args, 0) 6126 increment = seq_get(args, 1) 6127 elif self._match_text_seq("START"): 6128 start = self._parse_bitwise() 6129 self._match_text_seq("INCREMENT") 6130 increment = self._parse_bitwise() 6131 if self._match_text_seq("ORDER"): 6132 order = True 6133 elif self._match_text_seq("NOORDER"): 6134 order = False 6135 6136 if start and increment: 6137 return exp.GeneratedAsIdentityColumnConstraint( 6138 start=start, increment=increment, this=False, order=order 6139 ) 6140 6141 return exp.AutoIncrementColumnConstraint() 6142 6143 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6144 if not self._match_text_seq("REFRESH"): 6145 self._retreat(self._index - 1) 6146 return None 6147 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6148 6149 def _parse_compress(self) -> exp.CompressColumnConstraint: 6150 if self._match(TokenType.L_PAREN, advance=False): 6151 return self.expression( 6152 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6153 ) 6154 6155 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6156 6157 def _parse_generated_as_identity( 6158 self, 6159 ) -> ( 6160 exp.GeneratedAsIdentityColumnConstraint 6161 | exp.ComputedColumnConstraint 6162 | exp.GeneratedAsRowColumnConstraint 6163 ): 6164 if self._match_text_seq("BY", "DEFAULT"): 6165 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6166 this = self.expression( 6167 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6168 ) 6169 else: 6170 self._match_text_seq("ALWAYS") 6171 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6172 6173 self._match(TokenType.ALIAS) 6174 6175 if self._match_text_seq("ROW"): 6176 start = self._match_text_seq("START") 6177 if not start: 6178 self._match(TokenType.END) 6179 hidden = self._match_text_seq("HIDDEN") 6180 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6181 6182 identity = self._match_text_seq("IDENTITY") 6183 6184 if self._match(TokenType.L_PAREN): 6185 if self._match(TokenType.START_WITH): 6186 this.set("start", self._parse_bitwise()) 6187 if self._match_text_seq("INCREMENT", "BY"): 6188 this.set("increment", self._parse_bitwise()) 6189 if self._match_text_seq("MINVALUE"): 6190 this.set("minvalue", self._parse_bitwise()) 6191 if self._match_text_seq("MAXVALUE"): 6192 this.set("maxvalue", self._parse_bitwise()) 6193 6194 if self._match_text_seq("CYCLE"): 6195 this.set("cycle", True) 6196 elif self._match_text_seq("NO", "CYCLE"): 6197 this.set("cycle", False) 6198 6199 if not identity: 6200 this.set("expression", self._parse_range()) 6201 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6202 args = self._parse_csv(self._parse_bitwise) 6203 this.set("start", seq_get(args, 0)) 6204 this.set("increment", seq_get(args, 1)) 6205 6206 self._match_r_paren() 6207 6208 return this 6209 6210 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6211 self._match_text_seq("LENGTH") 6212 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6213 6214 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6215 if self._match_text_seq("NULL"): 6216 return self.expression(exp.NotNullColumnConstraint) 6217 if self._match_text_seq("CASESPECIFIC"): 6218 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6219 if self._match_text_seq("FOR", "REPLICATION"): 6220 return self.expression(exp.NotForReplicationColumnConstraint) 6221 6222 # Unconsume the `NOT` token 6223 self._retreat(self._index - 1) 6224 return None 6225 6226 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6227 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6228 6229 procedure_option_follows = ( 6230 self._match(TokenType.WITH, advance=False) 6231 and self._next 6232 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6233 ) 6234 6235 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6236 return self.expression( 6237 exp.ColumnConstraint, 6238 this=this, 6239 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6240 ) 6241 6242 return this 6243 6244 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6245 if not self._match(TokenType.CONSTRAINT): 6246 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6247 6248 return self.expression( 6249 exp.Constraint, 6250 this=self._parse_id_var(), 6251 expressions=self._parse_unnamed_constraints(), 6252 ) 6253 6254 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6255 constraints = [] 6256 while True: 6257 constraint = self._parse_unnamed_constraint() or self._parse_function() 6258 if not constraint: 6259 break 6260 constraints.append(constraint) 6261 6262 return constraints 6263 6264 def _parse_unnamed_constraint( 6265 self, constraints: t.Optional[t.Collection[str]] = None 6266 ) -> t.Optional[exp.Expression]: 6267 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6268 constraints or self.CONSTRAINT_PARSERS 6269 ): 6270 return None 6271 6272 constraint = self._prev.text.upper() 6273 if constraint not in self.CONSTRAINT_PARSERS: 6274 self.raise_error(f"No parser found for schema constraint {constraint}.") 6275 6276 return self.CONSTRAINT_PARSERS[constraint](self) 6277 6278 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6279 return self._parse_id_var(any_token=False) 6280 6281 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6282 self._match_texts(("KEY", "INDEX")) 6283 return self.expression( 6284 exp.UniqueColumnConstraint, 6285 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6286 this=self._parse_schema(self._parse_unique_key()), 6287 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6288 on_conflict=self._parse_on_conflict(), 6289 options=self._parse_key_constraint_options(), 6290 ) 6291 6292 def _parse_key_constraint_options(self) -> t.List[str]: 6293 options = [] 6294 while True: 6295 if not self._curr: 6296 break 6297 6298 if self._match(TokenType.ON): 6299 action = None 6300 on = self._advance_any() and self._prev.text 6301 6302 if self._match_text_seq("NO", "ACTION"): 6303 action = "NO ACTION" 6304 elif self._match_text_seq("CASCADE"): 6305 action = "CASCADE" 6306 elif self._match_text_seq("RESTRICT"): 6307 action = "RESTRICT" 6308 elif self._match_pair(TokenType.SET, TokenType.NULL): 6309 action = "SET NULL" 6310 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6311 action = "SET DEFAULT" 6312 else: 6313 self.raise_error("Invalid key constraint") 6314 6315 options.append(f"ON {on} {action}") 6316 else: 6317 var = self._parse_var_from_options( 6318 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6319 ) 6320 if not var: 6321 break 6322 options.append(var.name) 6323 6324 return options 6325 6326 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6327 if match and not self._match(TokenType.REFERENCES): 6328 return None 6329 6330 expressions = None 6331 this = self._parse_table(schema=True) 6332 options = self._parse_key_constraint_options() 6333 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6334 6335 def _parse_foreign_key(self) -> exp.ForeignKey: 6336 expressions = ( 6337 self._parse_wrapped_id_vars() 6338 if not self._match(TokenType.REFERENCES, advance=False) 6339 else None 6340 ) 6341 reference = self._parse_references() 6342 on_options = {} 6343 6344 while self._match(TokenType.ON): 6345 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6346 self.raise_error("Expected DELETE or UPDATE") 6347 6348 kind = self._prev.text.lower() 6349 6350 if self._match_text_seq("NO", "ACTION"): 6351 action = "NO ACTION" 6352 elif self._match(TokenType.SET): 6353 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6354 action = "SET " + self._prev.text.upper() 6355 else: 6356 self._advance() 6357 action = self._prev.text.upper() 6358 6359 on_options[kind] = action 6360 6361 return self.expression( 6362 exp.ForeignKey, 6363 expressions=expressions, 6364 reference=reference, 6365 options=self._parse_key_constraint_options(), 6366 **on_options, # type: ignore 6367 ) 6368 6369 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6370 return self._parse_ordered() or self._parse_field() 6371 6372 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6373 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6374 self._retreat(self._index - 1) 6375 return None 6376 6377 id_vars = self._parse_wrapped_id_vars() 6378 return self.expression( 6379 exp.PeriodForSystemTimeConstraint, 6380 this=seq_get(id_vars, 0), 6381 expression=seq_get(id_vars, 1), 6382 ) 6383 6384 def _parse_primary_key( 6385 self, wrapped_optional: bool = False, in_props: bool = False 6386 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6387 desc = ( 6388 self._match_set((TokenType.ASC, TokenType.DESC)) 6389 and self._prev.token_type == TokenType.DESC 6390 ) 6391 6392 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6393 return self.expression( 6394 exp.PrimaryKeyColumnConstraint, 6395 desc=desc, 6396 options=self._parse_key_constraint_options(), 6397 ) 6398 6399 expressions = self._parse_wrapped_csv( 6400 self._parse_primary_key_part, optional=wrapped_optional 6401 ) 6402 6403 return self.expression( 6404 exp.PrimaryKey, 6405 expressions=expressions, 6406 include=self._parse_index_params(), 6407 options=self._parse_key_constraint_options(), 6408 ) 6409 6410 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6411 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6412 6413 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6414 """ 6415 Parses a datetime column in ODBC format. We parse the column into the corresponding 6416 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6417 same as we did for `DATE('yyyy-mm-dd')`. 6418 6419 Reference: 6420 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6421 """ 6422 self._match(TokenType.VAR) 6423 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6424 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6425 if not self._match(TokenType.R_BRACE): 6426 self.raise_error("Expected }") 6427 return expression 6428 6429 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6430 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6431 return this 6432 6433 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6434 map_token = seq_get(self._tokens, self._index - 2) 6435 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6436 else: 6437 parse_map = False 6438 6439 bracket_kind = self._prev.token_type 6440 if ( 6441 bracket_kind == TokenType.L_BRACE 6442 and self._curr 6443 and self._curr.token_type == TokenType.VAR 6444 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6445 ): 6446 return self._parse_odbc_datetime_literal() 6447 6448 expressions = self._parse_csv( 6449 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6450 ) 6451 6452 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6453 self.raise_error("Expected ]") 6454 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6455 self.raise_error("Expected }") 6456 6457 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6458 if bracket_kind == TokenType.L_BRACE: 6459 this = self.expression( 6460 exp.Struct, 6461 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6462 ) 6463 elif not this: 6464 this = build_array_constructor( 6465 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6466 ) 6467 else: 6468 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6469 if constructor_type: 6470 return build_array_constructor( 6471 constructor_type, 6472 args=expressions, 6473 bracket_kind=bracket_kind, 6474 dialect=self.dialect, 6475 ) 6476 6477 expressions = apply_index_offset( 6478 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6479 ) 6480 this = self.expression( 6481 exp.Bracket, 6482 this=this, 6483 expressions=expressions, 6484 comments=this.pop_comments(), 6485 ) 6486 6487 self._add_comments(this) 6488 return self._parse_bracket(this) 6489 6490 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6491 if self._match(TokenType.COLON): 6492 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6493 return this 6494 6495 def _parse_case(self) -> t.Optional[exp.Expression]: 6496 if self._match(TokenType.DOT, advance=False): 6497 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6498 self._retreat(self._index - 1) 6499 return None 6500 6501 ifs = [] 6502 default = None 6503 6504 comments = self._prev_comments 6505 expression = self._parse_assignment() 6506 6507 while self._match(TokenType.WHEN): 6508 this = self._parse_assignment() 6509 self._match(TokenType.THEN) 6510 then = self._parse_assignment() 6511 ifs.append(self.expression(exp.If, this=this, true=then)) 6512 6513 if self._match(TokenType.ELSE): 6514 default = self._parse_assignment() 6515 6516 if not self._match(TokenType.END): 6517 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6518 default = exp.column("interval") 6519 else: 6520 self.raise_error("Expected END after CASE", self._prev) 6521 6522 return self.expression( 6523 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6524 ) 6525 6526 def _parse_if(self) -> t.Optional[exp.Expression]: 6527 if self._match(TokenType.L_PAREN): 6528 args = self._parse_csv( 6529 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6530 ) 6531 this = self.validate_expression(exp.If.from_arg_list(args), args) 6532 self._match_r_paren() 6533 else: 6534 index = self._index - 1 6535 6536 if self.NO_PAREN_IF_COMMANDS and index == 0: 6537 return self._parse_as_command(self._prev) 6538 6539 condition = self._parse_assignment() 6540 6541 if not condition: 6542 self._retreat(index) 6543 return None 6544 6545 self._match(TokenType.THEN) 6546 true = self._parse_assignment() 6547 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6548 self._match(TokenType.END) 6549 this = self.expression(exp.If, this=condition, true=true, false=false) 6550 6551 return this 6552 6553 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6554 if not self._match_text_seq("VALUE", "FOR"): 6555 self._retreat(self._index - 1) 6556 return None 6557 6558 return self.expression( 6559 exp.NextValueFor, 6560 this=self._parse_column(), 6561 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6562 ) 6563 6564 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6565 this = self._parse_function() or self._parse_var_or_string(upper=True) 6566 6567 if self._match(TokenType.FROM): 6568 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6569 6570 if not self._match(TokenType.COMMA): 6571 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6572 6573 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6574 6575 def _parse_gap_fill(self) -> exp.GapFill: 6576 self._match(TokenType.TABLE) 6577 this = self._parse_table() 6578 6579 self._match(TokenType.COMMA) 6580 args = [this, *self._parse_csv(self._parse_lambda)] 6581 6582 gap_fill = exp.GapFill.from_arg_list(args) 6583 return self.validate_expression(gap_fill, args) 6584 6585 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6586 this = self._parse_assignment() 6587 6588 if not self._match(TokenType.ALIAS): 6589 if self._match(TokenType.COMMA): 6590 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6591 6592 self.raise_error("Expected AS after CAST") 6593 6594 fmt = None 6595 to = self._parse_types() 6596 6597 default = self._match(TokenType.DEFAULT) 6598 if default: 6599 default = self._parse_bitwise() 6600 self._match_text_seq("ON", "CONVERSION", "ERROR") 6601 6602 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6603 fmt_string = self._parse_string() 6604 fmt = self._parse_at_time_zone(fmt_string) 6605 6606 if not to: 6607 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6608 if to.this in exp.DataType.TEMPORAL_TYPES: 6609 this = self.expression( 6610 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6611 this=this, 6612 format=exp.Literal.string( 6613 format_time( 6614 fmt_string.this if fmt_string else "", 6615 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6616 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6617 ) 6618 ), 6619 safe=safe, 6620 ) 6621 6622 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6623 this.set("zone", fmt.args["zone"]) 6624 return this 6625 elif not to: 6626 self.raise_error("Expected TYPE after CAST") 6627 elif isinstance(to, exp.Identifier): 6628 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6629 elif to.this == exp.DataType.Type.CHAR: 6630 if self._match(TokenType.CHARACTER_SET): 6631 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6632 6633 return self.build_cast( 6634 strict=strict, 6635 this=this, 6636 to=to, 6637 format=fmt, 6638 safe=safe, 6639 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6640 default=default, 6641 ) 6642 6643 def _parse_string_agg(self) -> exp.GroupConcat: 6644 if self._match(TokenType.DISTINCT): 6645 args: t.List[t.Optional[exp.Expression]] = [ 6646 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6647 ] 6648 if self._match(TokenType.COMMA): 6649 args.extend(self._parse_csv(self._parse_assignment)) 6650 else: 6651 args = self._parse_csv(self._parse_assignment) # type: ignore 6652 6653 if self._match_text_seq("ON", "OVERFLOW"): 6654 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6655 if self._match_text_seq("ERROR"): 6656 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6657 else: 6658 self._match_text_seq("TRUNCATE") 6659 on_overflow = self.expression( 6660 exp.OverflowTruncateBehavior, 6661 this=self._parse_string(), 6662 with_count=( 6663 self._match_text_seq("WITH", "COUNT") 6664 or not self._match_text_seq("WITHOUT", "COUNT") 6665 ), 6666 ) 6667 else: 6668 on_overflow = None 6669 6670 index = self._index 6671 if not self._match(TokenType.R_PAREN) and args: 6672 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6673 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6674 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6675 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6676 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6677 6678 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6679 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6680 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6681 if not self._match_text_seq("WITHIN", "GROUP"): 6682 self._retreat(index) 6683 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6684 6685 # The corresponding match_r_paren will be called in parse_function (caller) 6686 self._match_l_paren() 6687 6688 return self.expression( 6689 exp.GroupConcat, 6690 this=self._parse_order(this=seq_get(args, 0)), 6691 separator=seq_get(args, 1), 6692 on_overflow=on_overflow, 6693 ) 6694 6695 def _parse_convert( 6696 self, strict: bool, safe: t.Optional[bool] = None 6697 ) -> t.Optional[exp.Expression]: 6698 this = self._parse_bitwise() 6699 6700 if self._match(TokenType.USING): 6701 to: t.Optional[exp.Expression] = self.expression( 6702 exp.CharacterSet, this=self._parse_var() 6703 ) 6704 elif self._match(TokenType.COMMA): 6705 to = self._parse_types() 6706 else: 6707 to = None 6708 6709 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6710 6711 def _parse_xml_table(self) -> exp.XMLTable: 6712 namespaces = None 6713 passing = None 6714 columns = None 6715 6716 if self._match_text_seq("XMLNAMESPACES", "("): 6717 namespaces = self._parse_xml_namespace() 6718 self._match_text_seq(")", ",") 6719 6720 this = self._parse_string() 6721 6722 if self._match_text_seq("PASSING"): 6723 # The BY VALUE keywords are optional and are provided for semantic clarity 6724 self._match_text_seq("BY", "VALUE") 6725 passing = self._parse_csv(self._parse_column) 6726 6727 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6728 6729 if self._match_text_seq("COLUMNS"): 6730 columns = self._parse_csv(self._parse_field_def) 6731 6732 return self.expression( 6733 exp.XMLTable, 6734 this=this, 6735 namespaces=namespaces, 6736 passing=passing, 6737 columns=columns, 6738 by_ref=by_ref, 6739 ) 6740 6741 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6742 namespaces = [] 6743 6744 while True: 6745 if self._match(TokenType.DEFAULT): 6746 uri = self._parse_string() 6747 else: 6748 uri = self._parse_alias(self._parse_string()) 6749 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6750 if not self._match(TokenType.COMMA): 6751 break 6752 6753 return namespaces 6754 6755 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6756 args = self._parse_csv(self._parse_assignment) 6757 6758 if len(args) < 3: 6759 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6760 6761 return self.expression(exp.DecodeCase, expressions=args) 6762 6763 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6764 self._match_text_seq("KEY") 6765 key = self._parse_column() 6766 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6767 self._match_text_seq("VALUE") 6768 value = self._parse_bitwise() 6769 6770 if not key and not value: 6771 return None 6772 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6773 6774 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6775 if not this or not self._match_text_seq("FORMAT", "JSON"): 6776 return this 6777 6778 return self.expression(exp.FormatJson, this=this) 6779 6780 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6781 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6782 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6783 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6784 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6785 else: 6786 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6787 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6788 6789 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6790 6791 if not empty and not error and not null: 6792 return None 6793 6794 return self.expression( 6795 exp.OnCondition, 6796 empty=empty, 6797 error=error, 6798 null=null, 6799 ) 6800 6801 def _parse_on_handling( 6802 self, on: str, *values: str 6803 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6804 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6805 for value in values: 6806 if self._match_text_seq(value, "ON", on): 6807 return f"{value} ON {on}" 6808 6809 index = self._index 6810 if self._match(TokenType.DEFAULT): 6811 default_value = self._parse_bitwise() 6812 if self._match_text_seq("ON", on): 6813 return default_value 6814 6815 self._retreat(index) 6816 6817 return None 6818 6819 @t.overload 6820 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6821 6822 @t.overload 6823 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6824 6825 def _parse_json_object(self, agg=False): 6826 star = self._parse_star() 6827 expressions = ( 6828 [star] 6829 if star 6830 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6831 ) 6832 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6833 6834 unique_keys = None 6835 if self._match_text_seq("WITH", "UNIQUE"): 6836 unique_keys = True 6837 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6838 unique_keys = False 6839 6840 self._match_text_seq("KEYS") 6841 6842 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6843 self._parse_type() 6844 ) 6845 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6846 6847 return self.expression( 6848 exp.JSONObjectAgg if agg else exp.JSONObject, 6849 expressions=expressions, 6850 null_handling=null_handling, 6851 unique_keys=unique_keys, 6852 return_type=return_type, 6853 encoding=encoding, 6854 ) 6855 6856 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6857 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6858 if not self._match_text_seq("NESTED"): 6859 this = self._parse_id_var() 6860 kind = self._parse_types(allow_identifiers=False) 6861 nested = None 6862 else: 6863 this = None 6864 kind = None 6865 nested = True 6866 6867 path = self._match_text_seq("PATH") and self._parse_string() 6868 nested_schema = nested and self._parse_json_schema() 6869 6870 return self.expression( 6871 exp.JSONColumnDef, 6872 this=this, 6873 kind=kind, 6874 path=path, 6875 nested_schema=nested_schema, 6876 ) 6877 6878 def _parse_json_schema(self) -> exp.JSONSchema: 6879 self._match_text_seq("COLUMNS") 6880 return self.expression( 6881 exp.JSONSchema, 6882 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6883 ) 6884 6885 def _parse_json_table(self) -> exp.JSONTable: 6886 this = self._parse_format_json(self._parse_bitwise()) 6887 path = self._match(TokenType.COMMA) and self._parse_string() 6888 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6889 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6890 schema = self._parse_json_schema() 6891 6892 return exp.JSONTable( 6893 this=this, 6894 schema=schema, 6895 path=path, 6896 error_handling=error_handling, 6897 empty_handling=empty_handling, 6898 ) 6899 6900 def _parse_match_against(self) -> exp.MatchAgainst: 6901 if self._match_text_seq("TABLE"): 6902 # parse SingleStore MATCH(TABLE ...) syntax 6903 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6904 expressions = [] 6905 table = self._parse_table() 6906 if table: 6907 expressions = [table] 6908 else: 6909 expressions = self._parse_csv(self._parse_column) 6910 6911 self._match_text_seq(")", "AGAINST", "(") 6912 6913 this = self._parse_string() 6914 6915 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6916 modifier = "IN NATURAL LANGUAGE MODE" 6917 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6918 modifier = f"{modifier} WITH QUERY EXPANSION" 6919 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6920 modifier = "IN BOOLEAN MODE" 6921 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6922 modifier = "WITH QUERY EXPANSION" 6923 else: 6924 modifier = None 6925 6926 return self.expression( 6927 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6928 ) 6929 6930 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6931 def _parse_open_json(self) -> exp.OpenJSON: 6932 this = self._parse_bitwise() 6933 path = self._match(TokenType.COMMA) and self._parse_string() 6934 6935 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6936 this = self._parse_field(any_token=True) 6937 kind = self._parse_types() 6938 path = self._parse_string() 6939 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6940 6941 return self.expression( 6942 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6943 ) 6944 6945 expressions = None 6946 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6947 self._match_l_paren() 6948 expressions = self._parse_csv(_parse_open_json_column_def) 6949 6950 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6951 6952 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6953 args = self._parse_csv(self._parse_bitwise) 6954 6955 if self._match(TokenType.IN): 6956 return self.expression( 6957 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6958 ) 6959 6960 if haystack_first: 6961 haystack = seq_get(args, 0) 6962 needle = seq_get(args, 1) 6963 else: 6964 haystack = seq_get(args, 1) 6965 needle = seq_get(args, 0) 6966 6967 return self.expression( 6968 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6969 ) 6970 6971 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6972 args = self._parse_csv(self._parse_table) 6973 return exp.JoinHint(this=func_name.upper(), expressions=args) 6974 6975 def _parse_substring(self) -> exp.Substring: 6976 # Postgres supports the form: substring(string [from int] [for int]) 6977 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6978 6979 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6980 6981 if self._match(TokenType.FROM): 6982 args.append(self._parse_bitwise()) 6983 if self._match(TokenType.FOR): 6984 if len(args) == 1: 6985 args.append(exp.Literal.number(1)) 6986 args.append(self._parse_bitwise()) 6987 6988 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6989 6990 def _parse_trim(self) -> exp.Trim: 6991 # https://www.w3resource.com/sql/character-functions/trim.php 6992 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6993 6994 position = None 6995 collation = None 6996 expression = None 6997 6998 if self._match_texts(self.TRIM_TYPES): 6999 position = self._prev.text.upper() 7000 7001 this = self._parse_bitwise() 7002 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7003 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7004 expression = self._parse_bitwise() 7005 7006 if invert_order: 7007 this, expression = expression, this 7008 7009 if self._match(TokenType.COLLATE): 7010 collation = self._parse_bitwise() 7011 7012 return self.expression( 7013 exp.Trim, this=this, position=position, expression=expression, collation=collation 7014 ) 7015 7016 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7017 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7018 7019 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7020 return self._parse_window(self._parse_id_var(), alias=True) 7021 7022 def _parse_respect_or_ignore_nulls( 7023 self, this: t.Optional[exp.Expression] 7024 ) -> t.Optional[exp.Expression]: 7025 if self._match_text_seq("IGNORE", "NULLS"): 7026 return self.expression(exp.IgnoreNulls, this=this) 7027 if self._match_text_seq("RESPECT", "NULLS"): 7028 return self.expression(exp.RespectNulls, this=this) 7029 return this 7030 7031 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7032 if self._match(TokenType.HAVING): 7033 self._match_texts(("MAX", "MIN")) 7034 max = self._prev.text.upper() != "MIN" 7035 return self.expression( 7036 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7037 ) 7038 7039 return this 7040 7041 def _parse_window( 7042 self, this: t.Optional[exp.Expression], alias: bool = False 7043 ) -> t.Optional[exp.Expression]: 7044 func = this 7045 comments = func.comments if isinstance(func, exp.Expression) else None 7046 7047 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7048 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7049 if self._match_text_seq("WITHIN", "GROUP"): 7050 order = self._parse_wrapped(self._parse_order) 7051 this = self.expression(exp.WithinGroup, this=this, expression=order) 7052 7053 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7054 self._match(TokenType.WHERE) 7055 this = self.expression( 7056 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7057 ) 7058 self._match_r_paren() 7059 7060 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7061 # Some dialects choose to implement and some do not. 7062 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7063 7064 # There is some code above in _parse_lambda that handles 7065 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7066 7067 # The below changes handle 7068 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7069 7070 # Oracle allows both formats 7071 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7072 # and Snowflake chose to do the same for familiarity 7073 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7074 if isinstance(this, exp.AggFunc): 7075 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7076 7077 if ignore_respect and ignore_respect is not this: 7078 ignore_respect.replace(ignore_respect.this) 7079 this = self.expression(ignore_respect.__class__, this=this) 7080 7081 this = self._parse_respect_or_ignore_nulls(this) 7082 7083 # bigquery select from window x AS (partition by ...) 7084 if alias: 7085 over = None 7086 self._match(TokenType.ALIAS) 7087 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7088 return this 7089 else: 7090 over = self._prev.text.upper() 7091 7092 if comments and isinstance(func, exp.Expression): 7093 func.pop_comments() 7094 7095 if not self._match(TokenType.L_PAREN): 7096 return self.expression( 7097 exp.Window, 7098 comments=comments, 7099 this=this, 7100 alias=self._parse_id_var(False), 7101 over=over, 7102 ) 7103 7104 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7105 7106 first = self._match(TokenType.FIRST) 7107 if self._match_text_seq("LAST"): 7108 first = False 7109 7110 partition, order = self._parse_partition_and_order() 7111 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7112 7113 if kind: 7114 self._match(TokenType.BETWEEN) 7115 start = self._parse_window_spec() 7116 7117 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7118 exclude = ( 7119 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7120 if self._match_text_seq("EXCLUDE") 7121 else None 7122 ) 7123 7124 spec = self.expression( 7125 exp.WindowSpec, 7126 kind=kind, 7127 start=start["value"], 7128 start_side=start["side"], 7129 end=end.get("value"), 7130 end_side=end.get("side"), 7131 exclude=exclude, 7132 ) 7133 else: 7134 spec = None 7135 7136 self._match_r_paren() 7137 7138 window = self.expression( 7139 exp.Window, 7140 comments=comments, 7141 this=this, 7142 partition_by=partition, 7143 order=order, 7144 spec=spec, 7145 alias=window_alias, 7146 over=over, 7147 first=first, 7148 ) 7149 7150 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7151 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7152 return self._parse_window(window, alias=alias) 7153 7154 return window 7155 7156 def _parse_partition_and_order( 7157 self, 7158 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7159 return self._parse_partition_by(), self._parse_order() 7160 7161 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7162 self._match(TokenType.BETWEEN) 7163 7164 return { 7165 "value": ( 7166 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7167 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7168 or self._parse_type() 7169 ), 7170 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7171 } 7172 7173 def _parse_alias( 7174 self, this: t.Optional[exp.Expression], explicit: bool = False 7175 ) -> t.Optional[exp.Expression]: 7176 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7177 # so this section tries to parse the clause version and if it fails, it treats the token 7178 # as an identifier (alias) 7179 if self._can_parse_limit_or_offset(): 7180 return this 7181 7182 any_token = self._match(TokenType.ALIAS) 7183 comments = self._prev_comments or [] 7184 7185 if explicit and not any_token: 7186 return this 7187 7188 if self._match(TokenType.L_PAREN): 7189 aliases = self.expression( 7190 exp.Aliases, 7191 comments=comments, 7192 this=this, 7193 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7194 ) 7195 self._match_r_paren(aliases) 7196 return aliases 7197 7198 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7199 self.STRING_ALIASES and self._parse_string_as_identifier() 7200 ) 7201 7202 if alias: 7203 comments.extend(alias.pop_comments()) 7204 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7205 column = this.this 7206 7207 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7208 if not this.comments and column and column.comments: 7209 this.comments = column.pop_comments() 7210 7211 return this 7212 7213 def _parse_id_var( 7214 self, 7215 any_token: bool = True, 7216 tokens: t.Optional[t.Collection[TokenType]] = None, 7217 ) -> t.Optional[exp.Expression]: 7218 expression = self._parse_identifier() 7219 if not expression and ( 7220 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7221 ): 7222 quoted = self._prev.token_type == TokenType.STRING 7223 expression = self._identifier_expression(quoted=quoted) 7224 7225 return expression 7226 7227 def _parse_string(self) -> t.Optional[exp.Expression]: 7228 if self._match_set(self.STRING_PARSERS): 7229 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7230 return self._parse_placeholder() 7231 7232 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7233 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7234 if output: 7235 output.update_positions(self._prev) 7236 return output 7237 7238 def _parse_number(self) -> t.Optional[exp.Expression]: 7239 if self._match_set(self.NUMERIC_PARSERS): 7240 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7241 return self._parse_placeholder() 7242 7243 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7244 if self._match(TokenType.IDENTIFIER): 7245 return self._identifier_expression(quoted=True) 7246 return self._parse_placeholder() 7247 7248 def _parse_var( 7249 self, 7250 any_token: bool = False, 7251 tokens: t.Optional[t.Collection[TokenType]] = None, 7252 upper: bool = False, 7253 ) -> t.Optional[exp.Expression]: 7254 if ( 7255 (any_token and self._advance_any()) 7256 or self._match(TokenType.VAR) 7257 or (self._match_set(tokens) if tokens else False) 7258 ): 7259 return self.expression( 7260 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7261 ) 7262 return self._parse_placeholder() 7263 7264 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7265 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7266 self._advance() 7267 return self._prev 7268 return None 7269 7270 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7271 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7272 7273 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7274 return self._parse_primary() or self._parse_var(any_token=True) 7275 7276 def _parse_null(self) -> t.Optional[exp.Expression]: 7277 if self._match_set(self.NULL_TOKENS): 7278 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7279 return self._parse_placeholder() 7280 7281 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7282 if self._match(TokenType.TRUE): 7283 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7284 if self._match(TokenType.FALSE): 7285 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7286 return self._parse_placeholder() 7287 7288 def _parse_star(self) -> t.Optional[exp.Expression]: 7289 if self._match(TokenType.STAR): 7290 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7291 return self._parse_placeholder() 7292 7293 def _parse_parameter(self) -> exp.Parameter: 7294 this = self._parse_identifier() or self._parse_primary_or_var() 7295 return self.expression(exp.Parameter, this=this) 7296 7297 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7298 if self._match_set(self.PLACEHOLDER_PARSERS): 7299 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7300 if placeholder: 7301 return placeholder 7302 self._advance(-1) 7303 return None 7304 7305 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7306 if not self._match_texts(keywords): 7307 return None 7308 if self._match(TokenType.L_PAREN, advance=False): 7309 return self._parse_wrapped_csv(self._parse_expression) 7310 7311 expression = self._parse_expression() 7312 return [expression] if expression else None 7313 7314 def _parse_csv( 7315 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7316 ) -> t.List[exp.Expression]: 7317 parse_result = parse_method() 7318 items = [parse_result] if parse_result is not None else [] 7319 7320 while self._match(sep): 7321 self._add_comments(parse_result) 7322 parse_result = parse_method() 7323 if parse_result is not None: 7324 items.append(parse_result) 7325 7326 return items 7327 7328 def _parse_tokens( 7329 self, parse_method: t.Callable, expressions: t.Dict 7330 ) -> t.Optional[exp.Expression]: 7331 this = parse_method() 7332 7333 while self._match_set(expressions): 7334 this = self.expression( 7335 expressions[self._prev.token_type], 7336 this=this, 7337 comments=self._prev_comments, 7338 expression=parse_method(), 7339 ) 7340 7341 return this 7342 7343 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7344 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7345 7346 def _parse_wrapped_csv( 7347 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7348 ) -> t.List[exp.Expression]: 7349 return self._parse_wrapped( 7350 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7351 ) 7352 7353 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7354 wrapped = self._match(TokenType.L_PAREN) 7355 if not wrapped and not optional: 7356 self.raise_error("Expecting (") 7357 parse_result = parse_method() 7358 if wrapped: 7359 self._match_r_paren() 7360 return parse_result 7361 7362 def _parse_expressions(self) -> t.List[exp.Expression]: 7363 return self._parse_csv(self._parse_expression) 7364 7365 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7366 return ( 7367 self._parse_set_operations( 7368 self._parse_alias(self._parse_assignment(), explicit=True) 7369 if alias 7370 else self._parse_assignment() 7371 ) 7372 or self._parse_select() 7373 ) 7374 7375 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7376 return self._parse_query_modifiers( 7377 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7378 ) 7379 7380 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7381 this = None 7382 if self._match_texts(self.TRANSACTION_KIND): 7383 this = self._prev.text 7384 7385 self._match_texts(("TRANSACTION", "WORK")) 7386 7387 modes = [] 7388 while True: 7389 mode = [] 7390 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7391 mode.append(self._prev.text) 7392 7393 if mode: 7394 modes.append(" ".join(mode)) 7395 if not self._match(TokenType.COMMA): 7396 break 7397 7398 return self.expression(exp.Transaction, this=this, modes=modes) 7399 7400 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7401 chain = None 7402 savepoint = None 7403 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7404 7405 self._match_texts(("TRANSACTION", "WORK")) 7406 7407 if self._match_text_seq("TO"): 7408 self._match_text_seq("SAVEPOINT") 7409 savepoint = self._parse_id_var() 7410 7411 if self._match(TokenType.AND): 7412 chain = not self._match_text_seq("NO") 7413 self._match_text_seq("CHAIN") 7414 7415 if is_rollback: 7416 return self.expression(exp.Rollback, savepoint=savepoint) 7417 7418 return self.expression(exp.Commit, chain=chain) 7419 7420 def _parse_refresh(self) -> exp.Refresh: 7421 self._match(TokenType.TABLE) 7422 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7423 7424 def _parse_column_def_with_exists(self): 7425 start = self._index 7426 self._match(TokenType.COLUMN) 7427 7428 exists_column = self._parse_exists(not_=True) 7429 expression = self._parse_field_def() 7430 7431 if not isinstance(expression, exp.ColumnDef): 7432 self._retreat(start) 7433 return None 7434 7435 expression.set("exists", exists_column) 7436 7437 return expression 7438 7439 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7440 if not self._prev.text.upper() == "ADD": 7441 return None 7442 7443 expression = self._parse_column_def_with_exists() 7444 if not expression: 7445 return None 7446 7447 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7448 if self._match_texts(("FIRST", "AFTER")): 7449 position = self._prev.text 7450 column_position = self.expression( 7451 exp.ColumnPosition, this=self._parse_column(), position=position 7452 ) 7453 expression.set("position", column_position) 7454 7455 return expression 7456 7457 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7458 drop = self._match(TokenType.DROP) and self._parse_drop() 7459 if drop and not isinstance(drop, exp.Command): 7460 drop.set("kind", drop.args.get("kind", "COLUMN")) 7461 return drop 7462 7463 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7464 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7465 return self.expression( 7466 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7467 ) 7468 7469 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7470 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7471 self._match_text_seq("ADD") 7472 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7473 return self.expression( 7474 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7475 ) 7476 7477 column_def = self._parse_add_column() 7478 if isinstance(column_def, exp.ColumnDef): 7479 return column_def 7480 7481 exists = self._parse_exists(not_=True) 7482 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7483 return self.expression( 7484 exp.AddPartition, 7485 exists=exists, 7486 this=self._parse_field(any_token=True), 7487 location=self._match_text_seq("LOCATION", advance=False) 7488 and self._parse_property(), 7489 ) 7490 7491 return None 7492 7493 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7494 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7495 or self._match_text_seq("COLUMNS") 7496 ): 7497 schema = self._parse_schema() 7498 7499 return ( 7500 ensure_list(schema) 7501 if schema 7502 else self._parse_csv(self._parse_column_def_with_exists) 7503 ) 7504 7505 return self._parse_csv(_parse_add_alteration) 7506 7507 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7508 if self._match_texts(self.ALTER_ALTER_PARSERS): 7509 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7510 7511 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7512 # keyword after ALTER we default to parsing this statement 7513 self._match(TokenType.COLUMN) 7514 column = self._parse_field(any_token=True) 7515 7516 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7517 return self.expression(exp.AlterColumn, this=column, drop=True) 7518 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7519 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7520 if self._match(TokenType.COMMENT): 7521 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7522 if self._match_text_seq("DROP", "NOT", "NULL"): 7523 return self.expression( 7524 exp.AlterColumn, 7525 this=column, 7526 drop=True, 7527 allow_null=True, 7528 ) 7529 if self._match_text_seq("SET", "NOT", "NULL"): 7530 return self.expression( 7531 exp.AlterColumn, 7532 this=column, 7533 allow_null=False, 7534 ) 7535 7536 if self._match_text_seq("SET", "VISIBLE"): 7537 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7538 if self._match_text_seq("SET", "INVISIBLE"): 7539 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7540 7541 self._match_text_seq("SET", "DATA") 7542 self._match_text_seq("TYPE") 7543 return self.expression( 7544 exp.AlterColumn, 7545 this=column, 7546 dtype=self._parse_types(), 7547 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7548 using=self._match(TokenType.USING) and self._parse_assignment(), 7549 ) 7550 7551 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7552 if self._match_texts(("ALL", "EVEN", "AUTO")): 7553 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7554 7555 self._match_text_seq("KEY", "DISTKEY") 7556 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7557 7558 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7559 if compound: 7560 self._match_text_seq("SORTKEY") 7561 7562 if self._match(TokenType.L_PAREN, advance=False): 7563 return self.expression( 7564 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7565 ) 7566 7567 self._match_texts(("AUTO", "NONE")) 7568 return self.expression( 7569 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7570 ) 7571 7572 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7573 index = self._index - 1 7574 7575 partition_exists = self._parse_exists() 7576 if self._match(TokenType.PARTITION, advance=False): 7577 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7578 7579 self._retreat(index) 7580 return self._parse_csv(self._parse_drop_column) 7581 7582 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7583 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7584 exists = self._parse_exists() 7585 old_column = self._parse_column() 7586 to = self._match_text_seq("TO") 7587 new_column = self._parse_column() 7588 7589 if old_column is None or to is None or new_column is None: 7590 return None 7591 7592 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7593 7594 self._match_text_seq("TO") 7595 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7596 7597 def _parse_alter_table_set(self) -> exp.AlterSet: 7598 alter_set = self.expression(exp.AlterSet) 7599 7600 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7601 "TABLE", "PROPERTIES" 7602 ): 7603 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7604 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7605 alter_set.set("expressions", [self._parse_assignment()]) 7606 elif self._match_texts(("LOGGED", "UNLOGGED")): 7607 alter_set.set("option", exp.var(self._prev.text.upper())) 7608 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7609 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7610 elif self._match_text_seq("LOCATION"): 7611 alter_set.set("location", self._parse_field()) 7612 elif self._match_text_seq("ACCESS", "METHOD"): 7613 alter_set.set("access_method", self._parse_field()) 7614 elif self._match_text_seq("TABLESPACE"): 7615 alter_set.set("tablespace", self._parse_field()) 7616 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7617 alter_set.set("file_format", [self._parse_field()]) 7618 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7619 alter_set.set("file_format", self._parse_wrapped_options()) 7620 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7621 alter_set.set("copy_options", self._parse_wrapped_options()) 7622 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7623 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7624 else: 7625 if self._match_text_seq("SERDE"): 7626 alter_set.set("serde", self._parse_field()) 7627 7628 properties = self._parse_wrapped(self._parse_properties, optional=True) 7629 alter_set.set("expressions", [properties]) 7630 7631 return alter_set 7632 7633 def _parse_alter_session(self) -> exp.AlterSession: 7634 """Parse ALTER SESSION SET/UNSET statements.""" 7635 if self._match(TokenType.SET): 7636 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7637 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7638 7639 self._match_text_seq("UNSET") 7640 expressions = self._parse_csv( 7641 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7642 ) 7643 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7644 7645 def _parse_alter(self) -> exp.Alter | exp.Command: 7646 start = self._prev 7647 7648 alter_token = self._match_set(self.ALTERABLES) and self._prev 7649 if not alter_token: 7650 return self._parse_as_command(start) 7651 7652 exists = self._parse_exists() 7653 only = self._match_text_seq("ONLY") 7654 7655 if alter_token.token_type == TokenType.SESSION: 7656 this = None 7657 check = None 7658 cluster = None 7659 else: 7660 this = self._parse_table(schema=True) 7661 check = self._match_text_seq("WITH", "CHECK") 7662 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7663 7664 if self._next: 7665 self._advance() 7666 7667 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7668 if parser: 7669 actions = ensure_list(parser(self)) 7670 not_valid = self._match_text_seq("NOT", "VALID") 7671 options = self._parse_csv(self._parse_property) 7672 7673 if not self._curr and actions: 7674 return self.expression( 7675 exp.Alter, 7676 this=this, 7677 kind=alter_token.text.upper(), 7678 exists=exists, 7679 actions=actions, 7680 only=only, 7681 options=options, 7682 cluster=cluster, 7683 not_valid=not_valid, 7684 check=check, 7685 ) 7686 7687 return self._parse_as_command(start) 7688 7689 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7690 start = self._prev 7691 # https://duckdb.org/docs/sql/statements/analyze 7692 if not self._curr: 7693 return self.expression(exp.Analyze) 7694 7695 options = [] 7696 while self._match_texts(self.ANALYZE_STYLES): 7697 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7698 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7699 else: 7700 options.append(self._prev.text.upper()) 7701 7702 this: t.Optional[exp.Expression] = None 7703 inner_expression: t.Optional[exp.Expression] = None 7704 7705 kind = self._curr and self._curr.text.upper() 7706 7707 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7708 this = self._parse_table_parts() 7709 elif self._match_text_seq("TABLES"): 7710 if self._match_set((TokenType.FROM, TokenType.IN)): 7711 kind = f"{kind} {self._prev.text.upper()}" 7712 this = self._parse_table(schema=True, is_db_reference=True) 7713 elif self._match_text_seq("DATABASE"): 7714 this = self._parse_table(schema=True, is_db_reference=True) 7715 elif self._match_text_seq("CLUSTER"): 7716 this = self._parse_table() 7717 # Try matching inner expr keywords before fallback to parse table. 7718 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7719 kind = None 7720 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7721 else: 7722 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7723 kind = None 7724 this = self._parse_table_parts() 7725 7726 partition = self._try_parse(self._parse_partition) 7727 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7728 return self._parse_as_command(start) 7729 7730 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7731 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7732 "WITH", "ASYNC", "MODE" 7733 ): 7734 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7735 else: 7736 mode = None 7737 7738 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7739 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7740 7741 properties = self._parse_properties() 7742 return self.expression( 7743 exp.Analyze, 7744 kind=kind, 7745 this=this, 7746 mode=mode, 7747 partition=partition, 7748 properties=properties, 7749 expression=inner_expression, 7750 options=options, 7751 ) 7752 7753 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7754 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7755 this = None 7756 kind = self._prev.text.upper() 7757 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7758 expressions = [] 7759 7760 if not self._match_text_seq("STATISTICS"): 7761 self.raise_error("Expecting token STATISTICS") 7762 7763 if self._match_text_seq("NOSCAN"): 7764 this = "NOSCAN" 7765 elif self._match(TokenType.FOR): 7766 if self._match_text_seq("ALL", "COLUMNS"): 7767 this = "FOR ALL COLUMNS" 7768 if self._match_texts("COLUMNS"): 7769 this = "FOR COLUMNS" 7770 expressions = self._parse_csv(self._parse_column_reference) 7771 elif self._match_text_seq("SAMPLE"): 7772 sample = self._parse_number() 7773 expressions = [ 7774 self.expression( 7775 exp.AnalyzeSample, 7776 sample=sample, 7777 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7778 ) 7779 ] 7780 7781 return self.expression( 7782 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7783 ) 7784 7785 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7786 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7787 kind = None 7788 this = None 7789 expression: t.Optional[exp.Expression] = None 7790 if self._match_text_seq("REF", "UPDATE"): 7791 kind = "REF" 7792 this = "UPDATE" 7793 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7794 this = "UPDATE SET DANGLING TO NULL" 7795 elif self._match_text_seq("STRUCTURE"): 7796 kind = "STRUCTURE" 7797 if self._match_text_seq("CASCADE", "FAST"): 7798 this = "CASCADE FAST" 7799 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7800 ("ONLINE", "OFFLINE") 7801 ): 7802 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7803 expression = self._parse_into() 7804 7805 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7806 7807 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7808 this = self._prev.text.upper() 7809 if self._match_text_seq("COLUMNS"): 7810 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7811 return None 7812 7813 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7814 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7815 if self._match_text_seq("STATISTICS"): 7816 return self.expression(exp.AnalyzeDelete, kind=kind) 7817 return None 7818 7819 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7820 if self._match_text_seq("CHAINED", "ROWS"): 7821 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7822 return None 7823 7824 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7825 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7826 this = self._prev.text.upper() 7827 expression: t.Optional[exp.Expression] = None 7828 expressions = [] 7829 update_options = None 7830 7831 if self._match_text_seq("HISTOGRAM", "ON"): 7832 expressions = self._parse_csv(self._parse_column_reference) 7833 with_expressions = [] 7834 while self._match(TokenType.WITH): 7835 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7836 if self._match_texts(("SYNC", "ASYNC")): 7837 if self._match_text_seq("MODE", advance=False): 7838 with_expressions.append(f"{self._prev.text.upper()} MODE") 7839 self._advance() 7840 else: 7841 buckets = self._parse_number() 7842 if self._match_text_seq("BUCKETS"): 7843 with_expressions.append(f"{buckets} BUCKETS") 7844 if with_expressions: 7845 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7846 7847 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7848 TokenType.UPDATE, advance=False 7849 ): 7850 update_options = self._prev.text.upper() 7851 self._advance() 7852 elif self._match_text_seq("USING", "DATA"): 7853 expression = self.expression(exp.UsingData, this=self._parse_string()) 7854 7855 return self.expression( 7856 exp.AnalyzeHistogram, 7857 this=this, 7858 expressions=expressions, 7859 expression=expression, 7860 update_options=update_options, 7861 ) 7862 7863 def _parse_merge(self) -> exp.Merge: 7864 self._match(TokenType.INTO) 7865 target = self._parse_table() 7866 7867 if target and self._match(TokenType.ALIAS, advance=False): 7868 target.set("alias", self._parse_table_alias()) 7869 7870 self._match(TokenType.USING) 7871 using = self._parse_table() 7872 7873 self._match(TokenType.ON) 7874 on = self._parse_assignment() 7875 7876 return self.expression( 7877 exp.Merge, 7878 this=target, 7879 using=using, 7880 on=on, 7881 whens=self._parse_when_matched(), 7882 returning=self._parse_returning(), 7883 ) 7884 7885 def _parse_when_matched(self) -> exp.Whens: 7886 whens = [] 7887 7888 while self._match(TokenType.WHEN): 7889 matched = not self._match(TokenType.NOT) 7890 self._match_text_seq("MATCHED") 7891 source = ( 7892 False 7893 if self._match_text_seq("BY", "TARGET") 7894 else self._match_text_seq("BY", "SOURCE") 7895 ) 7896 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7897 7898 self._match(TokenType.THEN) 7899 7900 if self._match(TokenType.INSERT): 7901 this = self._parse_star() 7902 if this: 7903 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7904 else: 7905 then = self.expression( 7906 exp.Insert, 7907 this=exp.var("ROW") 7908 if self._match_text_seq("ROW") 7909 else self._parse_value(values=False), 7910 expression=self._match_text_seq("VALUES") and self._parse_value(), 7911 ) 7912 elif self._match(TokenType.UPDATE): 7913 expressions = self._parse_star() 7914 if expressions: 7915 then = self.expression(exp.Update, expressions=expressions) 7916 else: 7917 then = self.expression( 7918 exp.Update, 7919 expressions=self._match(TokenType.SET) 7920 and self._parse_csv(self._parse_equality), 7921 ) 7922 elif self._match(TokenType.DELETE): 7923 then = self.expression(exp.Var, this=self._prev.text) 7924 else: 7925 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7926 7927 whens.append( 7928 self.expression( 7929 exp.When, 7930 matched=matched, 7931 source=source, 7932 condition=condition, 7933 then=then, 7934 ) 7935 ) 7936 return self.expression(exp.Whens, expressions=whens) 7937 7938 def _parse_show(self) -> t.Optional[exp.Expression]: 7939 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7940 if parser: 7941 return parser(self) 7942 return self._parse_as_command(self._prev) 7943 7944 def _parse_set_item_assignment( 7945 self, kind: t.Optional[str] = None 7946 ) -> t.Optional[exp.Expression]: 7947 index = self._index 7948 7949 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7950 return self._parse_set_transaction(global_=kind == "GLOBAL") 7951 7952 left = self._parse_primary() or self._parse_column() 7953 assignment_delimiter = self._match_texts(("=", "TO")) 7954 7955 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7956 self._retreat(index) 7957 return None 7958 7959 right = self._parse_statement() or self._parse_id_var() 7960 if isinstance(right, (exp.Column, exp.Identifier)): 7961 right = exp.var(right.name) 7962 7963 this = self.expression(exp.EQ, this=left, expression=right) 7964 return self.expression(exp.SetItem, this=this, kind=kind) 7965 7966 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7967 self._match_text_seq("TRANSACTION") 7968 characteristics = self._parse_csv( 7969 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7970 ) 7971 return self.expression( 7972 exp.SetItem, 7973 expressions=characteristics, 7974 kind="TRANSACTION", 7975 **{"global": global_}, # type: ignore 7976 ) 7977 7978 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7979 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7980 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7981 7982 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7983 index = self._index 7984 set_ = self.expression( 7985 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7986 ) 7987 7988 if self._curr: 7989 self._retreat(index) 7990 return self._parse_as_command(self._prev) 7991 7992 return set_ 7993 7994 def _parse_var_from_options( 7995 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7996 ) -> t.Optional[exp.Var]: 7997 start = self._curr 7998 if not start: 7999 return None 8000 8001 option = start.text.upper() 8002 continuations = options.get(option) 8003 8004 index = self._index 8005 self._advance() 8006 for keywords in continuations or []: 8007 if isinstance(keywords, str): 8008 keywords = (keywords,) 8009 8010 if self._match_text_seq(*keywords): 8011 option = f"{option} {' '.join(keywords)}" 8012 break 8013 else: 8014 if continuations or continuations is None: 8015 if raise_unmatched: 8016 self.raise_error(f"Unknown option {option}") 8017 8018 self._retreat(index) 8019 return None 8020 8021 return exp.var(option) 8022 8023 def _parse_as_command(self, start: Token) -> exp.Command: 8024 while self._curr: 8025 self._advance() 8026 text = self._find_sql(start, self._prev) 8027 size = len(start.text) 8028 self._warn_unsupported() 8029 return exp.Command(this=text[:size], expression=text[size:]) 8030 8031 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8032 settings = [] 8033 8034 self._match_l_paren() 8035 kind = self._parse_id_var() 8036 8037 if self._match(TokenType.L_PAREN): 8038 while True: 8039 key = self._parse_id_var() 8040 value = self._parse_primary() 8041 if not key and value is None: 8042 break 8043 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8044 self._match(TokenType.R_PAREN) 8045 8046 self._match_r_paren() 8047 8048 return self.expression( 8049 exp.DictProperty, 8050 this=this, 8051 kind=kind.this if kind else None, 8052 settings=settings, 8053 ) 8054 8055 def _parse_dict_range(self, this: str) -> exp.DictRange: 8056 self._match_l_paren() 8057 has_min = self._match_text_seq("MIN") 8058 if has_min: 8059 min = self._parse_var() or self._parse_primary() 8060 self._match_text_seq("MAX") 8061 max = self._parse_var() or self._parse_primary() 8062 else: 8063 max = self._parse_var() or self._parse_primary() 8064 min = exp.Literal.number(0) 8065 self._match_r_paren() 8066 return self.expression(exp.DictRange, this=this, min=min, max=max) 8067 8068 def _parse_comprehension( 8069 self, this: t.Optional[exp.Expression] 8070 ) -> t.Optional[exp.Comprehension]: 8071 index = self._index 8072 expression = self._parse_column() 8073 if not self._match(TokenType.IN): 8074 self._retreat(index - 1) 8075 return None 8076 iterator = self._parse_column() 8077 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8078 return self.expression( 8079 exp.Comprehension, 8080 this=this, 8081 expression=expression, 8082 iterator=iterator, 8083 condition=condition, 8084 ) 8085 8086 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8087 if self._match(TokenType.HEREDOC_STRING): 8088 return self.expression(exp.Heredoc, this=self._prev.text) 8089 8090 if not self._match_text_seq("$"): 8091 return None 8092 8093 tags = ["$"] 8094 tag_text = None 8095 8096 if self._is_connected(): 8097 self._advance() 8098 tags.append(self._prev.text.upper()) 8099 else: 8100 self.raise_error("No closing $ found") 8101 8102 if tags[-1] != "$": 8103 if self._is_connected() and self._match_text_seq("$"): 8104 tag_text = tags[-1] 8105 tags.append("$") 8106 else: 8107 self.raise_error("No closing $ found") 8108 8109 heredoc_start = self._curr 8110 8111 while self._curr: 8112 if self._match_text_seq(*tags, advance=False): 8113 this = self._find_sql(heredoc_start, self._prev) 8114 self._advance(len(tags)) 8115 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8116 8117 self._advance() 8118 8119 self.raise_error(f"No closing {''.join(tags)} found") 8120 return None 8121 8122 def _find_parser( 8123 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8124 ) -> t.Optional[t.Callable]: 8125 if not self._curr: 8126 return None 8127 8128 index = self._index 8129 this = [] 8130 while True: 8131 # The current token might be multiple words 8132 curr = self._curr.text.upper() 8133 key = curr.split(" ") 8134 this.append(curr) 8135 8136 self._advance() 8137 result, trie = in_trie(trie, key) 8138 if result == TrieResult.FAILED: 8139 break 8140 8141 if result == TrieResult.EXISTS: 8142 subparser = parsers[" ".join(this)] 8143 return subparser 8144 8145 self._retreat(index) 8146 return None 8147 8148 def _match(self, token_type, advance=True, expression=None): 8149 if not self._curr: 8150 return None 8151 8152 if self._curr.token_type == token_type: 8153 if advance: 8154 self._advance() 8155 self._add_comments(expression) 8156 return True 8157 8158 return None 8159 8160 def _match_set(self, types, advance=True): 8161 if not self._curr: 8162 return None 8163 8164 if self._curr.token_type in types: 8165 if advance: 8166 self._advance() 8167 return True 8168 8169 return None 8170 8171 def _match_pair(self, token_type_a, token_type_b, advance=True): 8172 if not self._curr or not self._next: 8173 return None 8174 8175 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8176 if advance: 8177 self._advance(2) 8178 return True 8179 8180 return None 8181 8182 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8183 if not self._match(TokenType.L_PAREN, expression=expression): 8184 self.raise_error("Expecting (") 8185 8186 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8187 if not self._match(TokenType.R_PAREN, expression=expression): 8188 self.raise_error("Expecting )") 8189 8190 def _match_texts(self, texts, advance=True): 8191 if ( 8192 self._curr 8193 and self._curr.token_type != TokenType.STRING 8194 and self._curr.text.upper() in texts 8195 ): 8196 if advance: 8197 self._advance() 8198 return True 8199 return None 8200 8201 def _match_text_seq(self, *texts, advance=True): 8202 index = self._index 8203 for text in texts: 8204 if ( 8205 self._curr 8206 and self._curr.token_type != TokenType.STRING 8207 and self._curr.text.upper() == text 8208 ): 8209 self._advance() 8210 else: 8211 self._retreat(index) 8212 return None 8213 8214 if not advance: 8215 self._retreat(index) 8216 8217 return True 8218 8219 def _replace_lambda( 8220 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8221 ) -> t.Optional[exp.Expression]: 8222 if not node: 8223 return node 8224 8225 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8226 8227 for column in node.find_all(exp.Column): 8228 typ = lambda_types.get(column.parts[0].name) 8229 if typ is not None: 8230 dot_or_id = column.to_dot() if column.table else column.this 8231 8232 if typ: 8233 dot_or_id = self.expression( 8234 exp.Cast, 8235 this=dot_or_id, 8236 to=typ, 8237 ) 8238 8239 parent = column.parent 8240 8241 while isinstance(parent, exp.Dot): 8242 if not isinstance(parent.parent, exp.Dot): 8243 parent.replace(dot_or_id) 8244 break 8245 parent = parent.parent 8246 else: 8247 if column is node: 8248 node = dot_or_id 8249 else: 8250 column.replace(dot_or_id) 8251 return node 8252 8253 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8254 start = self._prev 8255 8256 # Not to be confused with TRUNCATE(number, decimals) function call 8257 if self._match(TokenType.L_PAREN): 8258 self._retreat(self._index - 2) 8259 return self._parse_function() 8260 8261 # Clickhouse supports TRUNCATE DATABASE as well 8262 is_database = self._match(TokenType.DATABASE) 8263 8264 self._match(TokenType.TABLE) 8265 8266 exists = self._parse_exists(not_=False) 8267 8268 expressions = self._parse_csv( 8269 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8270 ) 8271 8272 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8273 8274 if self._match_text_seq("RESTART", "IDENTITY"): 8275 identity = "RESTART" 8276 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8277 identity = "CONTINUE" 8278 else: 8279 identity = None 8280 8281 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8282 option = self._prev.text 8283 else: 8284 option = None 8285 8286 partition = self._parse_partition() 8287 8288 # Fallback case 8289 if self._curr: 8290 return self._parse_as_command(start) 8291 8292 return self.expression( 8293 exp.TruncateTable, 8294 expressions=expressions, 8295 is_database=is_database, 8296 exists=exists, 8297 cluster=cluster, 8298 identity=identity, 8299 option=option, 8300 partition=partition, 8301 ) 8302 8303 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8304 this = self._parse_ordered(self._parse_opclass) 8305 8306 if not self._match(TokenType.WITH): 8307 return this 8308 8309 op = self._parse_var(any_token=True) 8310 8311 return self.expression(exp.WithOperator, this=this, op=op) 8312 8313 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8314 self._match(TokenType.EQ) 8315 self._match(TokenType.L_PAREN) 8316 8317 opts: t.List[t.Optional[exp.Expression]] = [] 8318 option: exp.Expression | None 8319 while self._curr and not self._match(TokenType.R_PAREN): 8320 if self._match_text_seq("FORMAT_NAME", "="): 8321 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8322 option = self._parse_format_name() 8323 else: 8324 option = self._parse_property() 8325 8326 if option is None: 8327 self.raise_error("Unable to parse option") 8328 break 8329 8330 opts.append(option) 8331 8332 return opts 8333 8334 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8335 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8336 8337 options = [] 8338 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8339 option = self._parse_var(any_token=True) 8340 prev = self._prev.text.upper() 8341 8342 # Different dialects might separate options and values by white space, "=" and "AS" 8343 self._match(TokenType.EQ) 8344 self._match(TokenType.ALIAS) 8345 8346 param = self.expression(exp.CopyParameter, this=option) 8347 8348 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8349 TokenType.L_PAREN, advance=False 8350 ): 8351 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8352 param.set("expressions", self._parse_wrapped_options()) 8353 elif prev == "FILE_FORMAT": 8354 # T-SQL's external file format case 8355 param.set("expression", self._parse_field()) 8356 else: 8357 param.set("expression", self._parse_unquoted_field()) 8358 8359 options.append(param) 8360 self._match(sep) 8361 8362 return options 8363 8364 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8365 expr = self.expression(exp.Credentials) 8366 8367 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8368 expr.set("storage", self._parse_field()) 8369 if self._match_text_seq("CREDENTIALS"): 8370 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8371 creds = ( 8372 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8373 ) 8374 expr.set("credentials", creds) 8375 if self._match_text_seq("ENCRYPTION"): 8376 expr.set("encryption", self._parse_wrapped_options()) 8377 if self._match_text_seq("IAM_ROLE"): 8378 expr.set("iam_role", self._parse_field()) 8379 if self._match_text_seq("REGION"): 8380 expr.set("region", self._parse_field()) 8381 8382 return expr 8383 8384 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8385 return self._parse_field() 8386 8387 def _parse_copy(self) -> exp.Copy | exp.Command: 8388 start = self._prev 8389 8390 self._match(TokenType.INTO) 8391 8392 this = ( 8393 self._parse_select(nested=True, parse_subquery_alias=False) 8394 if self._match(TokenType.L_PAREN, advance=False) 8395 else self._parse_table(schema=True) 8396 ) 8397 8398 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8399 8400 files = self._parse_csv(self._parse_file_location) 8401 if self._match(TokenType.EQ, advance=False): 8402 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8403 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8404 # list via `_parse_wrapped(..)` below. 8405 self._advance(-1) 8406 files = [] 8407 8408 credentials = self._parse_credentials() 8409 8410 self._match_text_seq("WITH") 8411 8412 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8413 8414 # Fallback case 8415 if self._curr: 8416 return self._parse_as_command(start) 8417 8418 return self.expression( 8419 exp.Copy, 8420 this=this, 8421 kind=kind, 8422 credentials=credentials, 8423 files=files, 8424 params=params, 8425 ) 8426 8427 def _parse_normalize(self) -> exp.Normalize: 8428 return self.expression( 8429 exp.Normalize, 8430 this=self._parse_bitwise(), 8431 form=self._match(TokenType.COMMA) and self._parse_var(), 8432 ) 8433 8434 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8435 args = self._parse_csv(lambda: self._parse_lambda()) 8436 8437 this = seq_get(args, 0) 8438 decimals = seq_get(args, 1) 8439 8440 return expr_type( 8441 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8442 ) 8443 8444 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8445 star_token = self._prev 8446 8447 if self._match_text_seq("COLUMNS", "(", advance=False): 8448 this = self._parse_function() 8449 if isinstance(this, exp.Columns): 8450 this.set("unpack", True) 8451 return this 8452 8453 return self.expression( 8454 exp.Star, 8455 **{ # type: ignore 8456 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8457 "replace": self._parse_star_op("REPLACE"), 8458 "rename": self._parse_star_op("RENAME"), 8459 }, 8460 ).update_positions(star_token) 8461 8462 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8463 privilege_parts = [] 8464 8465 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8466 # (end of privilege list) or L_PAREN (start of column list) are met 8467 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8468 privilege_parts.append(self._curr.text.upper()) 8469 self._advance() 8470 8471 this = exp.var(" ".join(privilege_parts)) 8472 expressions = ( 8473 self._parse_wrapped_csv(self._parse_column) 8474 if self._match(TokenType.L_PAREN, advance=False) 8475 else None 8476 ) 8477 8478 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8479 8480 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8481 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8482 principal = self._parse_id_var() 8483 8484 if not principal: 8485 return None 8486 8487 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8488 8489 def _parse_grant_revoke_common( 8490 self, 8491 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8492 privileges = self._parse_csv(self._parse_grant_privilege) 8493 8494 self._match(TokenType.ON) 8495 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8496 8497 # Attempt to parse the securable e.g. MySQL allows names 8498 # such as "foo.*", "*.*" which are not easily parseable yet 8499 securable = self._try_parse(self._parse_table_parts) 8500 8501 return privileges, kind, securable 8502 8503 def _parse_grant(self) -> exp.Grant | exp.Command: 8504 start = self._prev 8505 8506 privileges, kind, securable = self._parse_grant_revoke_common() 8507 8508 if not securable or not self._match_text_seq("TO"): 8509 return self._parse_as_command(start) 8510 8511 principals = self._parse_csv(self._parse_grant_principal) 8512 8513 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8514 8515 if self._curr: 8516 return self._parse_as_command(start) 8517 8518 return self.expression( 8519 exp.Grant, 8520 privileges=privileges, 8521 kind=kind, 8522 securable=securable, 8523 principals=principals, 8524 grant_option=grant_option, 8525 ) 8526 8527 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8528 start = self._prev 8529 8530 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8531 8532 privileges, kind, securable = self._parse_grant_revoke_common() 8533 8534 if not securable or not self._match_text_seq("FROM"): 8535 return self._parse_as_command(start) 8536 8537 principals = self._parse_csv(self._parse_grant_principal) 8538 8539 cascade = None 8540 if self._match_texts(("CASCADE", "RESTRICT")): 8541 cascade = self._prev.text.upper() 8542 8543 if self._curr: 8544 return self._parse_as_command(start) 8545 8546 return self.expression( 8547 exp.Revoke, 8548 privileges=privileges, 8549 kind=kind, 8550 securable=securable, 8551 principals=principals, 8552 grant_option=grant_option, 8553 cascade=cascade, 8554 ) 8555 8556 def _parse_overlay(self) -> exp.Overlay: 8557 return self.expression( 8558 exp.Overlay, 8559 **{ # type: ignore 8560 "this": self._parse_bitwise(), 8561 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8562 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8563 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8564 }, 8565 ) 8566 8567 def _parse_format_name(self) -> exp.Property: 8568 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8569 # for FILE_FORMAT = <format_name> 8570 return self.expression( 8571 exp.Property, 8572 this=exp.var("FORMAT_NAME"), 8573 value=self._parse_string() or self._parse_table_parts(), 8574 ) 8575 8576 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8577 args: t.List[exp.Expression] = [] 8578 8579 if self._match(TokenType.DISTINCT): 8580 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8581 self._match(TokenType.COMMA) 8582 8583 args.extend(self._parse_csv(self._parse_assignment)) 8584 8585 return self.expression( 8586 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8587 ) 8588 8589 def _identifier_expression( 8590 self, token: t.Optional[Token] = None, **kwargs: t.Any 8591 ) -> exp.Identifier: 8592 token = token or self._prev 8593 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8594 expression.update_positions(token) 8595 return expression 8596 8597 def _build_pipe_cte( 8598 self, 8599 query: exp.Query, 8600 expressions: t.List[exp.Expression], 8601 alias_cte: t.Optional[exp.TableAlias] = None, 8602 ) -> exp.Select: 8603 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8604 if alias_cte: 8605 new_cte = alias_cte 8606 else: 8607 self._pipe_cte_counter += 1 8608 new_cte = f"__tmp{self._pipe_cte_counter}" 8609 8610 with_ = query.args.get("with") 8611 ctes = with_.pop() if with_ else None 8612 8613 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8614 if ctes: 8615 new_select.set("with", ctes) 8616 8617 return new_select.with_(new_cte, as_=query, copy=False) 8618 8619 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8620 select = self._parse_select(consume_pipe=False) 8621 if not select: 8622 return query 8623 8624 return self._build_pipe_cte( 8625 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8626 ) 8627 8628 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8629 limit = self._parse_limit() 8630 offset = self._parse_offset() 8631 if limit: 8632 curr_limit = query.args.get("limit", limit) 8633 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8634 query.limit(limit, copy=False) 8635 if offset: 8636 curr_offset = query.args.get("offset") 8637 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8638 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8639 8640 return query 8641 8642 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8643 this = self._parse_assignment() 8644 if self._match_text_seq("GROUP", "AND", advance=False): 8645 return this 8646 8647 this = self._parse_alias(this) 8648 8649 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8650 return self._parse_ordered(lambda: this) 8651 8652 return this 8653 8654 def _parse_pipe_syntax_aggregate_group_order_by( 8655 self, query: exp.Select, group_by_exists: bool = True 8656 ) -> exp.Select: 8657 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8658 aggregates_or_groups, orders = [], [] 8659 for element in expr: 8660 if isinstance(element, exp.Ordered): 8661 this = element.this 8662 if isinstance(this, exp.Alias): 8663 element.set("this", this.args["alias"]) 8664 orders.append(element) 8665 else: 8666 this = element 8667 aggregates_or_groups.append(this) 8668 8669 if group_by_exists: 8670 query.select(*aggregates_or_groups, copy=False).group_by( 8671 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8672 copy=False, 8673 ) 8674 else: 8675 query.select(*aggregates_or_groups, append=False, copy=False) 8676 8677 if orders: 8678 return query.order_by(*orders, append=False, copy=False) 8679 8680 return query 8681 8682 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8683 self._match_text_seq("AGGREGATE") 8684 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8685 8686 if self._match(TokenType.GROUP_BY) or ( 8687 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8688 ): 8689 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8690 8691 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8692 8693 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8694 first_setop = self.parse_set_operation(this=query) 8695 if not first_setop: 8696 return None 8697 8698 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8699 expr = self._parse_paren() 8700 return expr.assert_is(exp.Subquery).unnest() if expr else None 8701 8702 first_setop.this.pop() 8703 8704 setops = [ 8705 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8706 *self._parse_csv(_parse_and_unwrap_query), 8707 ] 8708 8709 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8710 with_ = query.args.get("with") 8711 ctes = with_.pop() if with_ else None 8712 8713 if isinstance(first_setop, exp.Union): 8714 query = query.union(*setops, copy=False, **first_setop.args) 8715 elif isinstance(first_setop, exp.Except): 8716 query = query.except_(*setops, copy=False, **first_setop.args) 8717 else: 8718 query = query.intersect(*setops, copy=False, **first_setop.args) 8719 8720 query.set("with", ctes) 8721 8722 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8723 8724 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8725 join = self._parse_join() 8726 if not join: 8727 return None 8728 8729 if isinstance(query, exp.Select): 8730 return query.join(join, copy=False) 8731 8732 return query 8733 8734 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8735 pivots = self._parse_pivots() 8736 if not pivots: 8737 return query 8738 8739 from_ = query.args.get("from") 8740 if from_: 8741 from_.this.set("pivots", pivots) 8742 8743 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8744 8745 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8746 self._match_text_seq("EXTEND") 8747 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8748 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8749 8750 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8751 sample = self._parse_table_sample() 8752 8753 with_ = query.args.get("with") 8754 if with_: 8755 with_.expressions[-1].this.set("sample", sample) 8756 else: 8757 query.set("sample", sample) 8758 8759 return query 8760 8761 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8762 if isinstance(query, exp.Subquery): 8763 query = exp.select("*").from_(query, copy=False) 8764 8765 if not query.args.get("from"): 8766 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8767 8768 while self._match(TokenType.PIPE_GT): 8769 start = self._curr 8770 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8771 if not parser: 8772 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8773 # keywords, making it tricky to disambiguate them without lookahead. The approach 8774 # here is to try and parse a set operation and if that fails, then try to parse a 8775 # join operator. If that fails as well, then the operator is not supported. 8776 parsed_query = self._parse_pipe_syntax_set_operator(query) 8777 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8778 if not parsed_query: 8779 self._retreat(start) 8780 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8781 break 8782 query = parsed_query 8783 else: 8784 query = parser(self, query) 8785 8786 return query 8787 8788 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8789 vars = self._parse_csv(self._parse_id_var) 8790 if not vars: 8791 return None 8792 8793 return self.expression( 8794 exp.DeclareItem, 8795 this=vars, 8796 kind=self._parse_types(), 8797 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8798 ) 8799 8800 def _parse_declare(self) -> exp.Declare | exp.Command: 8801 start = self._prev 8802 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8803 8804 if not expressions or self._curr: 8805 return self._parse_as_command(start) 8806 8807 return self.expression(exp.Declare, expressions=expressions) 8808 8809 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8810 exp_class = exp.Cast if strict else exp.TryCast 8811 8812 if exp_class == exp.TryCast: 8813 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8814 8815 return self.expression(exp_class, **kwargs) 8816 8817 def _parse_json_value(self) -> exp.JSONValue: 8818 this = self._parse_bitwise() 8819 self._match(TokenType.COMMA) 8820 path = self._parse_bitwise() 8821 8822 returning = self._match(TokenType.RETURNING) and self._parse_type() 8823 8824 return self.expression( 8825 exp.JSONValue, 8826 this=this, 8827 path=self.dialect.to_json_path(path), 8828 returning=returning, 8829 on_condition=self._parse_on_condition(), 8830 ) 8831 8832 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8833 def concat_exprs( 8834 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8835 ) -> exp.Expression: 8836 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8837 concat_exprs = [ 8838 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8839 ] 8840 node.set("expressions", concat_exprs) 8841 return node 8842 if len(exprs) == 1: 8843 return exprs[0] 8844 return self.expression(exp.Concat, expressions=args, safe=True) 8845 8846 args = self._parse_csv(self._parse_lambda) 8847 8848 if args: 8849 order = args[-1] if isinstance(args[-1], exp.Order) else None 8850 8851 if order: 8852 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8853 # remove 'expr' from exp.Order and add it back to args 8854 args[-1] = order.this 8855 order.set("this", concat_exprs(order.this, args)) 8856 8857 this = order or concat_exprs(args[0], args) 8858 else: 8859 this = None 8860 8861 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8862 8863 return self.expression(exp.GroupConcat, this=this, separator=separator)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 TokenType.SESSION, 484 } 485 486 # Tokens that can represent identifiers 487 ID_VAR_TOKENS = { 488 TokenType.ALL, 489 TokenType.ATTACH, 490 TokenType.VAR, 491 TokenType.ANTI, 492 TokenType.APPLY, 493 TokenType.ASC, 494 TokenType.ASOF, 495 TokenType.AUTO_INCREMENT, 496 TokenType.BEGIN, 497 TokenType.BPCHAR, 498 TokenType.CACHE, 499 TokenType.CASE, 500 TokenType.COLLATE, 501 TokenType.COMMAND, 502 TokenType.COMMENT, 503 TokenType.COMMIT, 504 TokenType.CONSTRAINT, 505 TokenType.COPY, 506 TokenType.CUBE, 507 TokenType.CURRENT_SCHEMA, 508 TokenType.DEFAULT, 509 TokenType.DELETE, 510 TokenType.DESC, 511 TokenType.DESCRIBE, 512 TokenType.DETACH, 513 TokenType.DICTIONARY, 514 TokenType.DIV, 515 TokenType.END, 516 TokenType.EXECUTE, 517 TokenType.EXPORT, 518 TokenType.ESCAPE, 519 TokenType.FALSE, 520 TokenType.FIRST, 521 TokenType.FILTER, 522 TokenType.FINAL, 523 TokenType.FORMAT, 524 TokenType.FULL, 525 TokenType.GET, 526 TokenType.IDENTIFIER, 527 TokenType.IS, 528 TokenType.ISNULL, 529 TokenType.INTERVAL, 530 TokenType.KEEP, 531 TokenType.KILL, 532 TokenType.LEFT, 533 TokenType.LIMIT, 534 TokenType.LOAD, 535 TokenType.MERGE, 536 TokenType.NATURAL, 537 TokenType.NEXT, 538 TokenType.OFFSET, 539 TokenType.OPERATOR, 540 TokenType.ORDINALITY, 541 TokenType.OVERLAPS, 542 TokenType.OVERWRITE, 543 TokenType.PARTITION, 544 TokenType.PERCENT, 545 TokenType.PIVOT, 546 TokenType.PRAGMA, 547 TokenType.PUT, 548 TokenType.RANGE, 549 TokenType.RECURSIVE, 550 TokenType.REFERENCES, 551 TokenType.REFRESH, 552 TokenType.RENAME, 553 TokenType.REPLACE, 554 TokenType.RIGHT, 555 TokenType.ROLLUP, 556 TokenType.ROW, 557 TokenType.ROWS, 558 TokenType.SEMI, 559 TokenType.SET, 560 TokenType.SETTINGS, 561 TokenType.SHOW, 562 TokenType.TEMPORARY, 563 TokenType.TOP, 564 TokenType.TRUE, 565 TokenType.TRUNCATE, 566 TokenType.UNIQUE, 567 TokenType.UNNEST, 568 TokenType.UNPIVOT, 569 TokenType.UPDATE, 570 TokenType.USE, 571 TokenType.VOLATILE, 572 TokenType.WINDOW, 573 *ALTERABLES, 574 *CREATABLES, 575 *SUBQUERY_PREDICATES, 576 *TYPE_TOKENS, 577 *NO_PAREN_FUNCTIONS, 578 } 579 ID_VAR_TOKENS.remove(TokenType.UNION) 580 581 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 582 TokenType.ANTI, 583 TokenType.ASOF, 584 TokenType.FULL, 585 TokenType.LEFT, 586 TokenType.LOCK, 587 TokenType.NATURAL, 588 TokenType.RIGHT, 589 TokenType.SEMI, 590 TokenType.WINDOW, 591 } 592 593 ALIAS_TOKENS = ID_VAR_TOKENS 594 595 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 596 597 ARRAY_CONSTRUCTORS = { 598 "ARRAY": exp.Array, 599 "LIST": exp.List, 600 } 601 602 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 603 604 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 605 606 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 607 608 FUNC_TOKENS = { 609 TokenType.COLLATE, 610 TokenType.COMMAND, 611 TokenType.CURRENT_DATE, 612 TokenType.CURRENT_DATETIME, 613 TokenType.CURRENT_SCHEMA, 614 TokenType.CURRENT_TIMESTAMP, 615 TokenType.CURRENT_TIME, 616 TokenType.CURRENT_USER, 617 TokenType.FILTER, 618 TokenType.FIRST, 619 TokenType.FORMAT, 620 TokenType.GET, 621 TokenType.GLOB, 622 TokenType.IDENTIFIER, 623 TokenType.INDEX, 624 TokenType.ISNULL, 625 TokenType.ILIKE, 626 TokenType.INSERT, 627 TokenType.LIKE, 628 TokenType.MERGE, 629 TokenType.NEXT, 630 TokenType.OFFSET, 631 TokenType.PRIMARY_KEY, 632 TokenType.RANGE, 633 TokenType.REPLACE, 634 TokenType.RLIKE, 635 TokenType.ROW, 636 TokenType.UNNEST, 637 TokenType.VAR, 638 TokenType.LEFT, 639 TokenType.RIGHT, 640 TokenType.SEQUENCE, 641 TokenType.DATE, 642 TokenType.DATETIME, 643 TokenType.TABLE, 644 TokenType.TIMESTAMP, 645 TokenType.TIMESTAMPTZ, 646 TokenType.TRUNCATE, 647 TokenType.UTC_DATE, 648 TokenType.UTC_TIME, 649 TokenType.UTC_TIMESTAMP, 650 TokenType.WINDOW, 651 TokenType.XOR, 652 *TYPE_TOKENS, 653 *SUBQUERY_PREDICATES, 654 } 655 656 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 657 TokenType.AND: exp.And, 658 } 659 660 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 661 TokenType.COLON_EQ: exp.PropertyEQ, 662 } 663 664 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 665 TokenType.OR: exp.Or, 666 } 667 668 EQUALITY = { 669 TokenType.EQ: exp.EQ, 670 TokenType.NEQ: exp.NEQ, 671 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 672 } 673 674 COMPARISON = { 675 TokenType.GT: exp.GT, 676 TokenType.GTE: exp.GTE, 677 TokenType.LT: exp.LT, 678 TokenType.LTE: exp.LTE, 679 } 680 681 BITWISE = { 682 TokenType.AMP: exp.BitwiseAnd, 683 TokenType.CARET: exp.BitwiseXor, 684 TokenType.PIPE: exp.BitwiseOr, 685 } 686 687 TERM = { 688 TokenType.DASH: exp.Sub, 689 TokenType.PLUS: exp.Add, 690 TokenType.MOD: exp.Mod, 691 TokenType.COLLATE: exp.Collate, 692 } 693 694 FACTOR = { 695 TokenType.DIV: exp.IntDiv, 696 TokenType.LR_ARROW: exp.Distance, 697 TokenType.SLASH: exp.Div, 698 TokenType.STAR: exp.Mul, 699 } 700 701 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 702 703 TIMES = { 704 TokenType.TIME, 705 TokenType.TIMETZ, 706 } 707 708 TIMESTAMPS = { 709 TokenType.TIMESTAMP, 710 TokenType.TIMESTAMPNTZ, 711 TokenType.TIMESTAMPTZ, 712 TokenType.TIMESTAMPLTZ, 713 *TIMES, 714 } 715 716 SET_OPERATIONS = { 717 TokenType.UNION, 718 TokenType.INTERSECT, 719 TokenType.EXCEPT, 720 } 721 722 JOIN_METHODS = { 723 TokenType.ASOF, 724 TokenType.NATURAL, 725 TokenType.POSITIONAL, 726 } 727 728 JOIN_SIDES = { 729 TokenType.LEFT, 730 TokenType.RIGHT, 731 TokenType.FULL, 732 } 733 734 JOIN_KINDS = { 735 TokenType.ANTI, 736 TokenType.CROSS, 737 TokenType.INNER, 738 TokenType.OUTER, 739 TokenType.SEMI, 740 TokenType.STRAIGHT_JOIN, 741 } 742 743 JOIN_HINTS: t.Set[str] = set() 744 745 LAMBDAS = { 746 TokenType.ARROW: lambda self, expressions: self.expression( 747 exp.Lambda, 748 this=self._replace_lambda( 749 self._parse_assignment(), 750 expressions, 751 ), 752 expressions=expressions, 753 ), 754 TokenType.FARROW: lambda self, expressions: self.expression( 755 exp.Kwarg, 756 this=exp.var(expressions[0].name), 757 expression=self._parse_assignment(), 758 ), 759 } 760 761 COLUMN_OPERATORS = { 762 TokenType.DOT: None, 763 TokenType.DOTCOLON: lambda self, this, to: self.expression( 764 exp.JSONCast, 765 this=this, 766 to=to, 767 ), 768 TokenType.DCOLON: lambda self, this, to: self.build_cast( 769 strict=self.STRICT_CAST, this=this, to=to 770 ), 771 TokenType.ARROW: lambda self, this, path: self.expression( 772 exp.JSONExtract, 773 this=this, 774 expression=self.dialect.to_json_path(path), 775 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 776 ), 777 TokenType.DARROW: lambda self, this, path: self.expression( 778 exp.JSONExtractScalar, 779 this=this, 780 expression=self.dialect.to_json_path(path), 781 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 782 ), 783 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtract, 785 this=this, 786 expression=path, 787 ), 788 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 789 exp.JSONBExtractScalar, 790 this=this, 791 expression=path, 792 ), 793 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 794 exp.JSONBContains, 795 this=this, 796 expression=key, 797 ), 798 } 799 800 CAST_COLUMN_OPERATORS = { 801 TokenType.DOTCOLON, 802 TokenType.DCOLON, 803 } 804 805 EXPRESSION_PARSERS = { 806 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 807 exp.Column: lambda self: self._parse_column(), 808 exp.Condition: lambda self: self._parse_assignment(), 809 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 810 exp.Expression: lambda self: self._parse_expression(), 811 exp.From: lambda self: self._parse_from(joins=True), 812 exp.Group: lambda self: self._parse_group(), 813 exp.Having: lambda self: self._parse_having(), 814 exp.Hint: lambda self: self._parse_hint_body(), 815 exp.Identifier: lambda self: self._parse_id_var(), 816 exp.Join: lambda self: self._parse_join(), 817 exp.Lambda: lambda self: self._parse_lambda(), 818 exp.Lateral: lambda self: self._parse_lateral(), 819 exp.Limit: lambda self: self._parse_limit(), 820 exp.Offset: lambda self: self._parse_offset(), 821 exp.Order: lambda self: self._parse_order(), 822 exp.Ordered: lambda self: self._parse_ordered(), 823 exp.Properties: lambda self: self._parse_properties(), 824 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 825 exp.Qualify: lambda self: self._parse_qualify(), 826 exp.Returning: lambda self: self._parse_returning(), 827 exp.Select: lambda self: self._parse_select(), 828 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 829 exp.Table: lambda self: self._parse_table_parts(), 830 exp.TableAlias: lambda self: self._parse_table_alias(), 831 exp.Tuple: lambda self: self._parse_value(values=False), 832 exp.Whens: lambda self: self._parse_when_matched(), 833 exp.Where: lambda self: self._parse_where(), 834 exp.Window: lambda self: self._parse_named_window(), 835 exp.With: lambda self: self._parse_with(), 836 "JOIN_TYPE": lambda self: self._parse_join_parts(), 837 } 838 839 STATEMENT_PARSERS = { 840 TokenType.ALTER: lambda self: self._parse_alter(), 841 TokenType.ANALYZE: lambda self: self._parse_analyze(), 842 TokenType.BEGIN: lambda self: self._parse_transaction(), 843 TokenType.CACHE: lambda self: self._parse_cache(), 844 TokenType.COMMENT: lambda self: self._parse_comment(), 845 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 846 TokenType.COPY: lambda self: self._parse_copy(), 847 TokenType.CREATE: lambda self: self._parse_create(), 848 TokenType.DELETE: lambda self: self._parse_delete(), 849 TokenType.DESC: lambda self: self._parse_describe(), 850 TokenType.DESCRIBE: lambda self: self._parse_describe(), 851 TokenType.DROP: lambda self: self._parse_drop(), 852 TokenType.GRANT: lambda self: self._parse_grant(), 853 TokenType.REVOKE: lambda self: self._parse_revoke(), 854 TokenType.INSERT: lambda self: self._parse_insert(), 855 TokenType.KILL: lambda self: self._parse_kill(), 856 TokenType.LOAD: lambda self: self._parse_load(), 857 TokenType.MERGE: lambda self: self._parse_merge(), 858 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 859 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 860 TokenType.REFRESH: lambda self: self._parse_refresh(), 861 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 862 TokenType.SET: lambda self: self._parse_set(), 863 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 864 TokenType.UNCACHE: lambda self: self._parse_uncache(), 865 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 866 TokenType.UPDATE: lambda self: self._parse_update(), 867 TokenType.USE: lambda self: self._parse_use(), 868 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 869 } 870 871 UNARY_PARSERS = { 872 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 873 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 874 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 875 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 876 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 877 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 878 } 879 880 STRING_PARSERS = { 881 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 882 exp.RawString, this=token.text 883 ), 884 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 885 exp.National, this=token.text 886 ), 887 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 888 TokenType.STRING: lambda self, token: self.expression( 889 exp.Literal, this=token.text, is_string=True 890 ), 891 TokenType.UNICODE_STRING: lambda self, token: self.expression( 892 exp.UnicodeString, 893 this=token.text, 894 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 895 ), 896 } 897 898 NUMERIC_PARSERS = { 899 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 900 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 901 TokenType.HEX_STRING: lambda self, token: self.expression( 902 exp.HexString, 903 this=token.text, 904 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 905 ), 906 TokenType.NUMBER: lambda self, token: self.expression( 907 exp.Literal, this=token.text, is_string=False 908 ), 909 } 910 911 PRIMARY_PARSERS = { 912 **STRING_PARSERS, 913 **NUMERIC_PARSERS, 914 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 915 TokenType.NULL: lambda self, _: self.expression(exp.Null), 916 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 917 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 918 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 919 TokenType.STAR: lambda self, _: self._parse_star_ops(), 920 } 921 922 PLACEHOLDER_PARSERS = { 923 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 924 TokenType.PARAMETER: lambda self: self._parse_parameter(), 925 TokenType.COLON: lambda self: ( 926 self.expression(exp.Placeholder, this=self._prev.text) 927 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 928 else None 929 ), 930 } 931 932 RANGE_PARSERS = { 933 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 934 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 935 TokenType.GLOB: binary_range_parser(exp.Glob), 936 TokenType.ILIKE: binary_range_parser(exp.ILike), 937 TokenType.IN: lambda self, this: self._parse_in(this), 938 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 939 TokenType.IS: lambda self, this: self._parse_is(this), 940 TokenType.LIKE: binary_range_parser(exp.Like), 941 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 942 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 943 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 944 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 945 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 946 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 947 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 948 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 949 } 950 951 PIPE_SYNTAX_TRANSFORM_PARSERS = { 952 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 953 "AS": lambda self, query: self._build_pipe_cte( 954 query, [exp.Star()], self._parse_table_alias() 955 ), 956 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 957 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 958 "ORDER BY": lambda self, query: query.order_by( 959 self._parse_order(), append=False, copy=False 960 ), 961 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 962 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 963 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 964 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 965 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 966 } 967 968 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 969 "ALLOWED_VALUES": lambda self: self.expression( 970 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 971 ), 972 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 973 "AUTO": lambda self: self._parse_auto_property(), 974 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 975 "BACKUP": lambda self: self.expression( 976 exp.BackupProperty, this=self._parse_var(any_token=True) 977 ), 978 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 979 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 980 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 981 "CHECKSUM": lambda self: self._parse_checksum(), 982 "CLUSTER BY": lambda self: self._parse_cluster(), 983 "CLUSTERED": lambda self: self._parse_clustered_by(), 984 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 985 exp.CollateProperty, **kwargs 986 ), 987 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 988 "CONTAINS": lambda self: self._parse_contains_property(), 989 "COPY": lambda self: self._parse_copy_property(), 990 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 991 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 992 "DEFINER": lambda self: self._parse_definer(), 993 "DETERMINISTIC": lambda self: self.expression( 994 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 995 ), 996 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 997 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 998 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 999 "DISTKEY": lambda self: self._parse_distkey(), 1000 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1001 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1002 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1003 "ENVIRONMENT": lambda self: self.expression( 1004 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1005 ), 1006 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1007 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1008 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1009 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1010 "FREESPACE": lambda self: self._parse_freespace(), 1011 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1012 "HEAP": lambda self: self.expression(exp.HeapProperty), 1013 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1014 "IMMUTABLE": lambda self: self.expression( 1015 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1016 ), 1017 "INHERITS": lambda self: self.expression( 1018 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1019 ), 1020 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1021 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1022 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1023 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1024 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1025 "LIKE": lambda self: self._parse_create_like(), 1026 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1027 "LOCK": lambda self: self._parse_locking(), 1028 "LOCKING": lambda self: self._parse_locking(), 1029 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1030 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1031 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1032 "MODIFIES": lambda self: self._parse_modifies_property(), 1033 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1034 "NO": lambda self: self._parse_no_property(), 1035 "ON": lambda self: self._parse_on_property(), 1036 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1037 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1038 "PARTITION": lambda self: self._parse_partitioned_of(), 1039 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1040 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1041 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1042 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1043 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1044 "READS": lambda self: self._parse_reads_property(), 1045 "REMOTE": lambda self: self._parse_remote_with_connection(), 1046 "RETURNS": lambda self: self._parse_returns(), 1047 "STRICT": lambda self: self.expression(exp.StrictProperty), 1048 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1049 "ROW": lambda self: self._parse_row(), 1050 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1051 "SAMPLE": lambda self: self.expression( 1052 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1053 ), 1054 "SECURE": lambda self: self.expression(exp.SecureProperty), 1055 "SECURITY": lambda self: self._parse_security(), 1056 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1057 "SETTINGS": lambda self: self._parse_settings_property(), 1058 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1059 "SORTKEY": lambda self: self._parse_sortkey(), 1060 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1061 "STABLE": lambda self: self.expression( 1062 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1063 ), 1064 "STORED": lambda self: self._parse_stored(), 1065 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1066 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1067 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1068 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1069 "TO": lambda self: self._parse_to_table(), 1070 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1071 "TRANSFORM": lambda self: self.expression( 1072 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1073 ), 1074 "TTL": lambda self: self._parse_ttl(), 1075 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1076 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1077 "VOLATILE": lambda self: self._parse_volatile_property(), 1078 "WITH": lambda self: self._parse_with_property(), 1079 } 1080 1081 CONSTRAINT_PARSERS = { 1082 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1083 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1084 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1085 "CHARACTER SET": lambda self: self.expression( 1086 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1087 ), 1088 "CHECK": lambda self: self.expression( 1089 exp.CheckColumnConstraint, 1090 this=self._parse_wrapped(self._parse_assignment), 1091 enforced=self._match_text_seq("ENFORCED"), 1092 ), 1093 "COLLATE": lambda self: self.expression( 1094 exp.CollateColumnConstraint, 1095 this=self._parse_identifier() or self._parse_column(), 1096 ), 1097 "COMMENT": lambda self: self.expression( 1098 exp.CommentColumnConstraint, this=self._parse_string() 1099 ), 1100 "COMPRESS": lambda self: self._parse_compress(), 1101 "CLUSTERED": lambda self: self.expression( 1102 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1103 ), 1104 "NONCLUSTERED": lambda self: self.expression( 1105 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1106 ), 1107 "DEFAULT": lambda self: self.expression( 1108 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1109 ), 1110 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1111 "EPHEMERAL": lambda self: self.expression( 1112 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1113 ), 1114 "EXCLUDE": lambda self: self.expression( 1115 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1116 ), 1117 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1118 "FORMAT": lambda self: self.expression( 1119 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1120 ), 1121 "GENERATED": lambda self: self._parse_generated_as_identity(), 1122 "IDENTITY": lambda self: self._parse_auto_increment(), 1123 "INLINE": lambda self: self._parse_inline(), 1124 "LIKE": lambda self: self._parse_create_like(), 1125 "NOT": lambda self: self._parse_not_constraint(), 1126 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1127 "ON": lambda self: ( 1128 self._match(TokenType.UPDATE) 1129 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1130 ) 1131 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1132 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1133 "PERIOD": lambda self: self._parse_period_for_system_time(), 1134 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1135 "REFERENCES": lambda self: self._parse_references(match=False), 1136 "TITLE": lambda self: self.expression( 1137 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1138 ), 1139 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1140 "UNIQUE": lambda self: self._parse_unique(), 1141 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1142 "WATERMARK": lambda self: self.expression( 1143 exp.WatermarkColumnConstraint, 1144 this=self._match(TokenType.FOR) and self._parse_column(), 1145 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1146 ), 1147 "WITH": lambda self: self.expression( 1148 exp.Properties, expressions=self._parse_wrapped_properties() 1149 ), 1150 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1151 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1152 } 1153 1154 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1155 if not self._match(TokenType.L_PAREN, advance=False): 1156 # Partitioning by bucket or truncate follows the syntax: 1157 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1158 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1159 self._retreat(self._index - 1) 1160 return None 1161 1162 klass = ( 1163 exp.PartitionedByBucket 1164 if self._prev.text.upper() == "BUCKET" 1165 else exp.PartitionByTruncate 1166 ) 1167 1168 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1169 this, expression = seq_get(args, 0), seq_get(args, 1) 1170 1171 if isinstance(this, exp.Literal): 1172 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1173 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1174 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1175 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1176 # 1177 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1178 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1179 this, expression = expression, this 1180 1181 return self.expression(klass, this=this, expression=expression) 1182 1183 ALTER_PARSERS = { 1184 "ADD": lambda self: self._parse_alter_table_add(), 1185 "AS": lambda self: self._parse_select(), 1186 "ALTER": lambda self: self._parse_alter_table_alter(), 1187 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1188 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1189 "DROP": lambda self: self._parse_alter_table_drop(), 1190 "RENAME": lambda self: self._parse_alter_table_rename(), 1191 "SET": lambda self: self._parse_alter_table_set(), 1192 "SWAP": lambda self: self.expression( 1193 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1194 ), 1195 } 1196 1197 ALTER_ALTER_PARSERS = { 1198 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1199 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1200 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1201 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1202 } 1203 1204 SCHEMA_UNNAMED_CONSTRAINTS = { 1205 "CHECK", 1206 "EXCLUDE", 1207 "FOREIGN KEY", 1208 "LIKE", 1209 "PERIOD", 1210 "PRIMARY KEY", 1211 "UNIQUE", 1212 "WATERMARK", 1213 "BUCKET", 1214 "TRUNCATE", 1215 } 1216 1217 NO_PAREN_FUNCTION_PARSERS = { 1218 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1219 "CASE": lambda self: self._parse_case(), 1220 "CONNECT_BY_ROOT": lambda self: self.expression( 1221 exp.ConnectByRoot, this=self._parse_column() 1222 ), 1223 "IF": lambda self: self._parse_if(), 1224 } 1225 1226 INVALID_FUNC_NAME_TOKENS = { 1227 TokenType.IDENTIFIER, 1228 TokenType.STRING, 1229 } 1230 1231 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1232 1233 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1234 1235 FUNCTION_PARSERS = { 1236 **{ 1237 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1238 }, 1239 **{ 1240 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1241 }, 1242 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1243 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1244 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1245 "DECODE": lambda self: self._parse_decode(), 1246 "EXTRACT": lambda self: self._parse_extract(), 1247 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1248 "GAP_FILL": lambda self: self._parse_gap_fill(), 1249 "JSON_OBJECT": lambda self: self._parse_json_object(), 1250 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1251 "JSON_TABLE": lambda self: self._parse_json_table(), 1252 "MATCH": lambda self: self._parse_match_against(), 1253 "NORMALIZE": lambda self: self._parse_normalize(), 1254 "OPENJSON": lambda self: self._parse_open_json(), 1255 "OVERLAY": lambda self: self._parse_overlay(), 1256 "POSITION": lambda self: self._parse_position(), 1257 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1258 "STRING_AGG": lambda self: self._parse_string_agg(), 1259 "SUBSTRING": lambda self: self._parse_substring(), 1260 "TRIM": lambda self: self._parse_trim(), 1261 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1262 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1263 "XMLELEMENT": lambda self: self.expression( 1264 exp.XMLElement, 1265 this=self._match_text_seq("NAME") and self._parse_id_var(), 1266 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1267 ), 1268 "XMLTABLE": lambda self: self._parse_xml_table(), 1269 } 1270 1271 QUERY_MODIFIER_PARSERS = { 1272 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1273 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1274 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1275 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1276 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1277 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1278 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1279 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1280 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1281 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1282 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1283 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1284 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1285 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1286 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1287 TokenType.CLUSTER_BY: lambda self: ( 1288 "cluster", 1289 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1290 ), 1291 TokenType.DISTRIBUTE_BY: lambda self: ( 1292 "distribute", 1293 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1294 ), 1295 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1296 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1297 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1298 } 1299 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1300 1301 SET_PARSERS = { 1302 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1303 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1304 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1305 "TRANSACTION": lambda self: self._parse_set_transaction(), 1306 } 1307 1308 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1309 1310 TYPE_LITERAL_PARSERS = { 1311 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1312 } 1313 1314 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1315 1316 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1317 1318 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1319 1320 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1321 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1322 "ISOLATION": ( 1323 ("LEVEL", "REPEATABLE", "READ"), 1324 ("LEVEL", "READ", "COMMITTED"), 1325 ("LEVEL", "READ", "UNCOMITTED"), 1326 ("LEVEL", "SERIALIZABLE"), 1327 ), 1328 "READ": ("WRITE", "ONLY"), 1329 } 1330 1331 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1332 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1333 ) 1334 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1335 1336 CREATE_SEQUENCE: OPTIONS_TYPE = { 1337 "SCALE": ("EXTEND", "NOEXTEND"), 1338 "SHARD": ("EXTEND", "NOEXTEND"), 1339 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1340 **dict.fromkeys( 1341 ( 1342 "SESSION", 1343 "GLOBAL", 1344 "KEEP", 1345 "NOKEEP", 1346 "ORDER", 1347 "NOORDER", 1348 "NOCACHE", 1349 "CYCLE", 1350 "NOCYCLE", 1351 "NOMINVALUE", 1352 "NOMAXVALUE", 1353 "NOSCALE", 1354 "NOSHARD", 1355 ), 1356 tuple(), 1357 ), 1358 } 1359 1360 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1361 1362 USABLES: OPTIONS_TYPE = dict.fromkeys( 1363 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1364 ) 1365 1366 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1367 1368 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1369 "TYPE": ("EVOLUTION",), 1370 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1371 } 1372 1373 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1374 1375 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1376 1377 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1378 "NOT": ("ENFORCED",), 1379 "MATCH": ( 1380 "FULL", 1381 "PARTIAL", 1382 "SIMPLE", 1383 ), 1384 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1385 "USING": ( 1386 "BTREE", 1387 "HASH", 1388 ), 1389 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1390 } 1391 1392 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1393 "NO": ("OTHERS",), 1394 "CURRENT": ("ROW",), 1395 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1396 } 1397 1398 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1399 1400 CLONE_KEYWORDS = {"CLONE", "COPY"} 1401 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1402 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1403 1404 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1405 1406 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1407 1408 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1409 1410 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1411 1412 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1413 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1414 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1415 1416 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1417 1418 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1419 1420 ADD_CONSTRAINT_TOKENS = { 1421 TokenType.CONSTRAINT, 1422 TokenType.FOREIGN_KEY, 1423 TokenType.INDEX, 1424 TokenType.KEY, 1425 TokenType.PRIMARY_KEY, 1426 TokenType.UNIQUE, 1427 } 1428 1429 DISTINCT_TOKENS = {TokenType.DISTINCT} 1430 1431 NULL_TOKENS = {TokenType.NULL} 1432 1433 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1434 1435 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1436 1437 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1438 1439 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1440 1441 ODBC_DATETIME_LITERALS = { 1442 "d": exp.Date, 1443 "t": exp.Time, 1444 "ts": exp.Timestamp, 1445 } 1446 1447 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1448 1449 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1450 1451 # The style options for the DESCRIBE statement 1452 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1453 1454 # The style options for the ANALYZE statement 1455 ANALYZE_STYLES = { 1456 "BUFFER_USAGE_LIMIT", 1457 "FULL", 1458 "LOCAL", 1459 "NO_WRITE_TO_BINLOG", 1460 "SAMPLE", 1461 "SKIP_LOCKED", 1462 "VERBOSE", 1463 } 1464 1465 ANALYZE_EXPRESSION_PARSERS = { 1466 "ALL": lambda self: self._parse_analyze_columns(), 1467 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1468 "DELETE": lambda self: self._parse_analyze_delete(), 1469 "DROP": lambda self: self._parse_analyze_histogram(), 1470 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1471 "LIST": lambda self: self._parse_analyze_list(), 1472 "PREDICATE": lambda self: self._parse_analyze_columns(), 1473 "UPDATE": lambda self: self._parse_analyze_histogram(), 1474 "VALIDATE": lambda self: self._parse_analyze_validate(), 1475 } 1476 1477 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1478 1479 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1480 1481 OPERATION_MODIFIERS: t.Set[str] = set() 1482 1483 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1484 1485 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1486 1487 STRICT_CAST = True 1488 1489 PREFIXED_PIVOT_COLUMNS = False 1490 IDENTIFY_PIVOT_STRINGS = False 1491 1492 LOG_DEFAULTS_TO_LN = False 1493 1494 # Whether the table sample clause expects CSV syntax 1495 TABLESAMPLE_CSV = False 1496 1497 # The default method used for table sampling 1498 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1499 1500 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1501 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1502 1503 # Whether the TRIM function expects the characters to trim as its first argument 1504 TRIM_PATTERN_FIRST = False 1505 1506 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1507 STRING_ALIASES = False 1508 1509 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1510 MODIFIERS_ATTACHED_TO_SET_OP = True 1511 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1512 1513 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1514 NO_PAREN_IF_COMMANDS = True 1515 1516 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1517 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1518 1519 # Whether the `:` operator is used to extract a value from a VARIANT column 1520 COLON_IS_VARIANT_EXTRACT = False 1521 1522 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1523 # If this is True and '(' is not found, the keyword will be treated as an identifier 1524 VALUES_FOLLOWED_BY_PAREN = True 1525 1526 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1527 SUPPORTS_IMPLICIT_UNNEST = False 1528 1529 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1530 INTERVAL_SPANS = True 1531 1532 # Whether a PARTITION clause can follow a table reference 1533 SUPPORTS_PARTITION_SELECTION = False 1534 1535 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1536 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1537 1538 # Whether the 'AS' keyword is optional in the CTE definition syntax 1539 OPTIONAL_ALIAS_TOKEN_CTE = True 1540 1541 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1542 ALTER_RENAME_REQUIRES_COLUMN = True 1543 1544 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1545 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1546 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1547 # as BigQuery, where all joins have the same precedence. 1548 JOINS_HAVE_EQUAL_PRECEDENCE = False 1549 1550 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1551 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1552 1553 # Whether map literals support arbitrary expressions as keys. 1554 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1555 # When False, keys are typically restricted to identifiers. 1556 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1557 1558 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1559 # is true for Snowflake but not for BigQuery which can also process strings 1560 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1561 1562 # Dialects like Databricks support JOINS without join criteria 1563 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1564 ADD_JOIN_ON_TRUE = False 1565 1566 __slots__ = ( 1567 "error_level", 1568 "error_message_context", 1569 "max_errors", 1570 "dialect", 1571 "sql", 1572 "errors", 1573 "_tokens", 1574 "_index", 1575 "_curr", 1576 "_next", 1577 "_prev", 1578 "_prev_comments", 1579 "_pipe_cte_counter", 1580 ) 1581 1582 # Autofilled 1583 SHOW_TRIE: t.Dict = {} 1584 SET_TRIE: t.Dict = {} 1585 1586 def __init__( 1587 self, 1588 error_level: t.Optional[ErrorLevel] = None, 1589 error_message_context: int = 100, 1590 max_errors: int = 3, 1591 dialect: DialectType = None, 1592 ): 1593 from sqlglot.dialects import Dialect 1594 1595 self.error_level = error_level or ErrorLevel.IMMEDIATE 1596 self.error_message_context = error_message_context 1597 self.max_errors = max_errors 1598 self.dialect = Dialect.get_or_raise(dialect) 1599 self.reset() 1600 1601 def reset(self): 1602 self.sql = "" 1603 self.errors = [] 1604 self._tokens = [] 1605 self._index = 0 1606 self._curr = None 1607 self._next = None 1608 self._prev = None 1609 self._prev_comments = None 1610 self._pipe_cte_counter = 0 1611 1612 def parse( 1613 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1614 ) -> t.List[t.Optional[exp.Expression]]: 1615 """ 1616 Parses a list of tokens and returns a list of syntax trees, one tree 1617 per parsed SQL statement. 1618 1619 Args: 1620 raw_tokens: The list of tokens. 1621 sql: The original SQL string, used to produce helpful debug messages. 1622 1623 Returns: 1624 The list of the produced syntax trees. 1625 """ 1626 return self._parse( 1627 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1628 ) 1629 1630 def parse_into( 1631 self, 1632 expression_types: exp.IntoType, 1633 raw_tokens: t.List[Token], 1634 sql: t.Optional[str] = None, 1635 ) -> t.List[t.Optional[exp.Expression]]: 1636 """ 1637 Parses a list of tokens into a given Expression type. If a collection of Expression 1638 types is given instead, this method will try to parse the token list into each one 1639 of them, stopping at the first for which the parsing succeeds. 1640 1641 Args: 1642 expression_types: The expression type(s) to try and parse the token list into. 1643 raw_tokens: The list of tokens. 1644 sql: The original SQL string, used to produce helpful debug messages. 1645 1646 Returns: 1647 The target Expression. 1648 """ 1649 errors = [] 1650 for expression_type in ensure_list(expression_types): 1651 parser = self.EXPRESSION_PARSERS.get(expression_type) 1652 if not parser: 1653 raise TypeError(f"No parser registered for {expression_type}") 1654 1655 try: 1656 return self._parse(parser, raw_tokens, sql) 1657 except ParseError as e: 1658 e.errors[0]["into_expression"] = expression_type 1659 errors.append(e) 1660 1661 raise ParseError( 1662 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1663 errors=merge_errors(errors), 1664 ) from errors[-1] 1665 1666 def _parse( 1667 self, 1668 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1669 raw_tokens: t.List[Token], 1670 sql: t.Optional[str] = None, 1671 ) -> t.List[t.Optional[exp.Expression]]: 1672 self.reset() 1673 self.sql = sql or "" 1674 1675 total = len(raw_tokens) 1676 chunks: t.List[t.List[Token]] = [[]] 1677 1678 for i, token in enumerate(raw_tokens): 1679 if token.token_type == TokenType.SEMICOLON: 1680 if token.comments: 1681 chunks.append([token]) 1682 1683 if i < total - 1: 1684 chunks.append([]) 1685 else: 1686 chunks[-1].append(token) 1687 1688 expressions = [] 1689 1690 for tokens in chunks: 1691 self._index = -1 1692 self._tokens = tokens 1693 self._advance() 1694 1695 expressions.append(parse_method(self)) 1696 1697 if self._index < len(self._tokens): 1698 self.raise_error("Invalid expression / Unexpected token") 1699 1700 self.check_errors() 1701 1702 return expressions 1703 1704 def check_errors(self) -> None: 1705 """Logs or raises any found errors, depending on the chosen error level setting.""" 1706 if self.error_level == ErrorLevel.WARN: 1707 for error in self.errors: 1708 logger.error(str(error)) 1709 elif self.error_level == ErrorLevel.RAISE and self.errors: 1710 raise ParseError( 1711 concat_messages(self.errors, self.max_errors), 1712 errors=merge_errors(self.errors), 1713 ) 1714 1715 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1716 """ 1717 Appends an error in the list of recorded errors or raises it, depending on the chosen 1718 error level setting. 1719 """ 1720 token = token or self._curr or self._prev or Token.string("") 1721 start = token.start 1722 end = token.end + 1 1723 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1724 highlight = self.sql[start:end] 1725 end_context = self.sql[end : end + self.error_message_context] 1726 1727 error = ParseError.new( 1728 f"{message}. Line {token.line}, Col: {token.col}.\n" 1729 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1730 description=message, 1731 line=token.line, 1732 col=token.col, 1733 start_context=start_context, 1734 highlight=highlight, 1735 end_context=end_context, 1736 ) 1737 1738 if self.error_level == ErrorLevel.IMMEDIATE: 1739 raise error 1740 1741 self.errors.append(error) 1742 1743 def expression( 1744 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1745 ) -> E: 1746 """ 1747 Creates a new, validated Expression. 1748 1749 Args: 1750 exp_class: The expression class to instantiate. 1751 comments: An optional list of comments to attach to the expression. 1752 kwargs: The arguments to set for the expression along with their respective values. 1753 1754 Returns: 1755 The target expression. 1756 """ 1757 instance = exp_class(**kwargs) 1758 instance.add_comments(comments) if comments else self._add_comments(instance) 1759 return self.validate_expression(instance) 1760 1761 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1762 if expression and self._prev_comments: 1763 expression.add_comments(self._prev_comments) 1764 self._prev_comments = None 1765 1766 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1767 """ 1768 Validates an Expression, making sure that all its mandatory arguments are set. 1769 1770 Args: 1771 expression: The expression to validate. 1772 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1773 1774 Returns: 1775 The validated expression. 1776 """ 1777 if self.error_level != ErrorLevel.IGNORE: 1778 for error_message in expression.error_messages(args): 1779 self.raise_error(error_message) 1780 1781 return expression 1782 1783 def _find_sql(self, start: Token, end: Token) -> str: 1784 return self.sql[start.start : end.end + 1] 1785 1786 def _is_connected(self) -> bool: 1787 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1788 1789 def _advance(self, times: int = 1) -> None: 1790 self._index += times 1791 self._curr = seq_get(self._tokens, self._index) 1792 self._next = seq_get(self._tokens, self._index + 1) 1793 1794 if self._index > 0: 1795 self._prev = self._tokens[self._index - 1] 1796 self._prev_comments = self._prev.comments 1797 else: 1798 self._prev = None 1799 self._prev_comments = None 1800 1801 def _retreat(self, index: int) -> None: 1802 if index != self._index: 1803 self._advance(index - self._index) 1804 1805 def _warn_unsupported(self) -> None: 1806 if len(self._tokens) <= 1: 1807 return 1808 1809 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1810 # interested in emitting a warning for the one being currently processed. 1811 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1812 1813 logger.warning( 1814 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1815 ) 1816 1817 def _parse_command(self) -> exp.Command: 1818 self._warn_unsupported() 1819 return self.expression( 1820 exp.Command, 1821 comments=self._prev_comments, 1822 this=self._prev.text.upper(), 1823 expression=self._parse_string(), 1824 ) 1825 1826 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1827 """ 1828 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1829 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1830 solve this by setting & resetting the parser state accordingly 1831 """ 1832 index = self._index 1833 error_level = self.error_level 1834 1835 self.error_level = ErrorLevel.IMMEDIATE 1836 try: 1837 this = parse_method() 1838 except ParseError: 1839 this = None 1840 finally: 1841 if not this or retreat: 1842 self._retreat(index) 1843 self.error_level = error_level 1844 1845 return this 1846 1847 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1848 start = self._prev 1849 exists = self._parse_exists() if allow_exists else None 1850 1851 self._match(TokenType.ON) 1852 1853 materialized = self._match_text_seq("MATERIALIZED") 1854 kind = self._match_set(self.CREATABLES) and self._prev 1855 if not kind: 1856 return self._parse_as_command(start) 1857 1858 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1859 this = self._parse_user_defined_function(kind=kind.token_type) 1860 elif kind.token_type == TokenType.TABLE: 1861 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1862 elif kind.token_type == TokenType.COLUMN: 1863 this = self._parse_column() 1864 else: 1865 this = self._parse_id_var() 1866 1867 self._match(TokenType.IS) 1868 1869 return self.expression( 1870 exp.Comment, 1871 this=this, 1872 kind=kind.text, 1873 expression=self._parse_string(), 1874 exists=exists, 1875 materialized=materialized, 1876 ) 1877 1878 def _parse_to_table( 1879 self, 1880 ) -> exp.ToTableProperty: 1881 table = self._parse_table_parts(schema=True) 1882 return self.expression(exp.ToTableProperty, this=table) 1883 1884 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1885 def _parse_ttl(self) -> exp.Expression: 1886 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1887 this = self._parse_bitwise() 1888 1889 if self._match_text_seq("DELETE"): 1890 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1891 if self._match_text_seq("RECOMPRESS"): 1892 return self.expression( 1893 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1894 ) 1895 if self._match_text_seq("TO", "DISK"): 1896 return self.expression( 1897 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1898 ) 1899 if self._match_text_seq("TO", "VOLUME"): 1900 return self.expression( 1901 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1902 ) 1903 1904 return this 1905 1906 expressions = self._parse_csv(_parse_ttl_action) 1907 where = self._parse_where() 1908 group = self._parse_group() 1909 1910 aggregates = None 1911 if group and self._match(TokenType.SET): 1912 aggregates = self._parse_csv(self._parse_set_item) 1913 1914 return self.expression( 1915 exp.MergeTreeTTL, 1916 expressions=expressions, 1917 where=where, 1918 group=group, 1919 aggregates=aggregates, 1920 ) 1921 1922 def _parse_statement(self) -> t.Optional[exp.Expression]: 1923 if self._curr is None: 1924 return None 1925 1926 if self._match_set(self.STATEMENT_PARSERS): 1927 comments = self._prev_comments 1928 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1929 stmt.add_comments(comments, prepend=True) 1930 return stmt 1931 1932 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1933 return self._parse_command() 1934 1935 expression = self._parse_expression() 1936 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1937 return self._parse_query_modifiers(expression) 1938 1939 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1940 start = self._prev 1941 temporary = self._match(TokenType.TEMPORARY) 1942 materialized = self._match_text_seq("MATERIALIZED") 1943 1944 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1945 if not kind: 1946 return self._parse_as_command(start) 1947 1948 concurrently = self._match_text_seq("CONCURRENTLY") 1949 if_exists = exists or self._parse_exists() 1950 1951 if kind == "COLUMN": 1952 this = self._parse_column() 1953 else: 1954 this = self._parse_table_parts( 1955 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1956 ) 1957 1958 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1959 1960 if self._match(TokenType.L_PAREN, advance=False): 1961 expressions = self._parse_wrapped_csv(self._parse_types) 1962 else: 1963 expressions = None 1964 1965 return self.expression( 1966 exp.Drop, 1967 exists=if_exists, 1968 this=this, 1969 expressions=expressions, 1970 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1971 temporary=temporary, 1972 materialized=materialized, 1973 cascade=self._match_text_seq("CASCADE"), 1974 constraints=self._match_text_seq("CONSTRAINTS"), 1975 purge=self._match_text_seq("PURGE"), 1976 cluster=cluster, 1977 concurrently=concurrently, 1978 ) 1979 1980 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1981 return ( 1982 self._match_text_seq("IF") 1983 and (not not_ or self._match(TokenType.NOT)) 1984 and self._match(TokenType.EXISTS) 1985 ) 1986 1987 def _parse_create(self) -> exp.Create | exp.Command: 1988 # Note: this can't be None because we've matched a statement parser 1989 start = self._prev 1990 1991 replace = ( 1992 start.token_type == TokenType.REPLACE 1993 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1994 or self._match_pair(TokenType.OR, TokenType.ALTER) 1995 ) 1996 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1997 1998 unique = self._match(TokenType.UNIQUE) 1999 2000 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2001 clustered = True 2002 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2003 "COLUMNSTORE" 2004 ): 2005 clustered = False 2006 else: 2007 clustered = None 2008 2009 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2010 self._advance() 2011 2012 properties = None 2013 create_token = self._match_set(self.CREATABLES) and self._prev 2014 2015 if not create_token: 2016 # exp.Properties.Location.POST_CREATE 2017 properties = self._parse_properties() 2018 create_token = self._match_set(self.CREATABLES) and self._prev 2019 2020 if not properties or not create_token: 2021 return self._parse_as_command(start) 2022 2023 concurrently = self._match_text_seq("CONCURRENTLY") 2024 exists = self._parse_exists(not_=True) 2025 this = None 2026 expression: t.Optional[exp.Expression] = None 2027 indexes = None 2028 no_schema_binding = None 2029 begin = None 2030 end = None 2031 clone = None 2032 2033 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2034 nonlocal properties 2035 if properties and temp_props: 2036 properties.expressions.extend(temp_props.expressions) 2037 elif temp_props: 2038 properties = temp_props 2039 2040 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2041 this = self._parse_user_defined_function(kind=create_token.token_type) 2042 2043 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2044 extend_props(self._parse_properties()) 2045 2046 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2047 extend_props(self._parse_properties()) 2048 2049 if not expression: 2050 if self._match(TokenType.COMMAND): 2051 expression = self._parse_as_command(self._prev) 2052 else: 2053 begin = self._match(TokenType.BEGIN) 2054 return_ = self._match_text_seq("RETURN") 2055 2056 if self._match(TokenType.STRING, advance=False): 2057 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2058 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2059 expression = self._parse_string() 2060 extend_props(self._parse_properties()) 2061 else: 2062 expression = self._parse_user_defined_function_expression() 2063 2064 end = self._match_text_seq("END") 2065 2066 if return_: 2067 expression = self.expression(exp.Return, this=expression) 2068 elif create_token.token_type == TokenType.INDEX: 2069 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2070 if not self._match(TokenType.ON): 2071 index = self._parse_id_var() 2072 anonymous = False 2073 else: 2074 index = None 2075 anonymous = True 2076 2077 this = self._parse_index(index=index, anonymous=anonymous) 2078 elif create_token.token_type in self.DB_CREATABLES: 2079 table_parts = self._parse_table_parts( 2080 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2081 ) 2082 2083 # exp.Properties.Location.POST_NAME 2084 self._match(TokenType.COMMA) 2085 extend_props(self._parse_properties(before=True)) 2086 2087 this = self._parse_schema(this=table_parts) 2088 2089 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2090 extend_props(self._parse_properties()) 2091 2092 has_alias = self._match(TokenType.ALIAS) 2093 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2094 # exp.Properties.Location.POST_ALIAS 2095 extend_props(self._parse_properties()) 2096 2097 if create_token.token_type == TokenType.SEQUENCE: 2098 expression = self._parse_types() 2099 props = self._parse_properties() 2100 if props: 2101 sequence_props = exp.SequenceProperties() 2102 options = [] 2103 for prop in props: 2104 if isinstance(prop, exp.SequenceProperties): 2105 for arg, value in prop.args.items(): 2106 if arg == "options": 2107 options.extend(value) 2108 else: 2109 sequence_props.set(arg, value) 2110 prop.pop() 2111 2112 if options: 2113 sequence_props.set("options", options) 2114 2115 props.append("expressions", sequence_props) 2116 extend_props(props) 2117 else: 2118 expression = self._parse_ddl_select() 2119 2120 # Some dialects also support using a table as an alias instead of a SELECT. 2121 # Here we fallback to this as an alternative. 2122 if not expression and has_alias: 2123 expression = self._try_parse(self._parse_table_parts) 2124 2125 if create_token.token_type == TokenType.TABLE: 2126 # exp.Properties.Location.POST_EXPRESSION 2127 extend_props(self._parse_properties()) 2128 2129 indexes = [] 2130 while True: 2131 index = self._parse_index() 2132 2133 # exp.Properties.Location.POST_INDEX 2134 extend_props(self._parse_properties()) 2135 if not index: 2136 break 2137 else: 2138 self._match(TokenType.COMMA) 2139 indexes.append(index) 2140 elif create_token.token_type == TokenType.VIEW: 2141 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2142 no_schema_binding = True 2143 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2144 extend_props(self._parse_properties()) 2145 2146 shallow = self._match_text_seq("SHALLOW") 2147 2148 if self._match_texts(self.CLONE_KEYWORDS): 2149 copy = self._prev.text.lower() == "copy" 2150 clone = self.expression( 2151 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2152 ) 2153 2154 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2155 return self._parse_as_command(start) 2156 2157 create_kind_text = create_token.text.upper() 2158 return self.expression( 2159 exp.Create, 2160 this=this, 2161 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2162 replace=replace, 2163 refresh=refresh, 2164 unique=unique, 2165 expression=expression, 2166 exists=exists, 2167 properties=properties, 2168 indexes=indexes, 2169 no_schema_binding=no_schema_binding, 2170 begin=begin, 2171 end=end, 2172 clone=clone, 2173 concurrently=concurrently, 2174 clustered=clustered, 2175 ) 2176 2177 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2178 seq = exp.SequenceProperties() 2179 2180 options = [] 2181 index = self._index 2182 2183 while self._curr: 2184 self._match(TokenType.COMMA) 2185 if self._match_text_seq("INCREMENT"): 2186 self._match_text_seq("BY") 2187 self._match_text_seq("=") 2188 seq.set("increment", self._parse_term()) 2189 elif self._match_text_seq("MINVALUE"): 2190 seq.set("minvalue", self._parse_term()) 2191 elif self._match_text_seq("MAXVALUE"): 2192 seq.set("maxvalue", self._parse_term()) 2193 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2194 self._match_text_seq("=") 2195 seq.set("start", self._parse_term()) 2196 elif self._match_text_seq("CACHE"): 2197 # T-SQL allows empty CACHE which is initialized dynamically 2198 seq.set("cache", self._parse_number() or True) 2199 elif self._match_text_seq("OWNED", "BY"): 2200 # "OWNED BY NONE" is the default 2201 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2202 else: 2203 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2204 if opt: 2205 options.append(opt) 2206 else: 2207 break 2208 2209 seq.set("options", options if options else None) 2210 return None if self._index == index else seq 2211 2212 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2213 # only used for teradata currently 2214 self._match(TokenType.COMMA) 2215 2216 kwargs = { 2217 "no": self._match_text_seq("NO"), 2218 "dual": self._match_text_seq("DUAL"), 2219 "before": self._match_text_seq("BEFORE"), 2220 "default": self._match_text_seq("DEFAULT"), 2221 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2222 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2223 "after": self._match_text_seq("AFTER"), 2224 "minimum": self._match_texts(("MIN", "MINIMUM")), 2225 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2226 } 2227 2228 if self._match_texts(self.PROPERTY_PARSERS): 2229 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2230 try: 2231 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2232 except TypeError: 2233 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2234 2235 return None 2236 2237 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2238 return self._parse_wrapped_csv(self._parse_property) 2239 2240 def _parse_property(self) -> t.Optional[exp.Expression]: 2241 if self._match_texts(self.PROPERTY_PARSERS): 2242 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2243 2244 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2245 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2246 2247 if self._match_text_seq("COMPOUND", "SORTKEY"): 2248 return self._parse_sortkey(compound=True) 2249 2250 if self._match_text_seq("SQL", "SECURITY"): 2251 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2252 2253 index = self._index 2254 2255 seq_props = self._parse_sequence_properties() 2256 if seq_props: 2257 return seq_props 2258 2259 self._retreat(index) 2260 key = self._parse_column() 2261 2262 if not self._match(TokenType.EQ): 2263 self._retreat(index) 2264 return None 2265 2266 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2267 if isinstance(key, exp.Column): 2268 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2269 2270 value = self._parse_bitwise() or self._parse_var(any_token=True) 2271 2272 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2273 if isinstance(value, exp.Column): 2274 value = exp.var(value.name) 2275 2276 return self.expression(exp.Property, this=key, value=value) 2277 2278 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2279 if self._match_text_seq("BY"): 2280 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2281 2282 self._match(TokenType.ALIAS) 2283 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2284 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2285 2286 return self.expression( 2287 exp.FileFormatProperty, 2288 this=( 2289 self.expression( 2290 exp.InputOutputFormat, 2291 input_format=input_format, 2292 output_format=output_format, 2293 ) 2294 if input_format or output_format 2295 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2296 ), 2297 hive_format=True, 2298 ) 2299 2300 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2301 field = self._parse_field() 2302 if isinstance(field, exp.Identifier) and not field.quoted: 2303 field = exp.var(field) 2304 2305 return field 2306 2307 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2308 self._match(TokenType.EQ) 2309 self._match(TokenType.ALIAS) 2310 2311 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2312 2313 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2314 properties = [] 2315 while True: 2316 if before: 2317 prop = self._parse_property_before() 2318 else: 2319 prop = self._parse_property() 2320 if not prop: 2321 break 2322 for p in ensure_list(prop): 2323 properties.append(p) 2324 2325 if properties: 2326 return self.expression(exp.Properties, expressions=properties) 2327 2328 return None 2329 2330 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2331 return self.expression( 2332 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2333 ) 2334 2335 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2336 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2337 security_specifier = self._prev.text.upper() 2338 return self.expression(exp.SecurityProperty, this=security_specifier) 2339 return None 2340 2341 def _parse_settings_property(self) -> exp.SettingsProperty: 2342 return self.expression( 2343 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2344 ) 2345 2346 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2347 if self._index >= 2: 2348 pre_volatile_token = self._tokens[self._index - 2] 2349 else: 2350 pre_volatile_token = None 2351 2352 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2353 return exp.VolatileProperty() 2354 2355 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2356 2357 def _parse_retention_period(self) -> exp.Var: 2358 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2359 number = self._parse_number() 2360 number_str = f"{number} " if number else "" 2361 unit = self._parse_var(any_token=True) 2362 return exp.var(f"{number_str}{unit}") 2363 2364 def _parse_system_versioning_property( 2365 self, with_: bool = False 2366 ) -> exp.WithSystemVersioningProperty: 2367 self._match(TokenType.EQ) 2368 prop = self.expression( 2369 exp.WithSystemVersioningProperty, 2370 **{ # type: ignore 2371 "on": True, 2372 "with": with_, 2373 }, 2374 ) 2375 2376 if self._match_text_seq("OFF"): 2377 prop.set("on", False) 2378 return prop 2379 2380 self._match(TokenType.ON) 2381 if self._match(TokenType.L_PAREN): 2382 while self._curr and not self._match(TokenType.R_PAREN): 2383 if self._match_text_seq("HISTORY_TABLE", "="): 2384 prop.set("this", self._parse_table_parts()) 2385 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2386 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2387 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2388 prop.set("retention_period", self._parse_retention_period()) 2389 2390 self._match(TokenType.COMMA) 2391 2392 return prop 2393 2394 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2395 self._match(TokenType.EQ) 2396 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2397 prop = self.expression(exp.DataDeletionProperty, on=on) 2398 2399 if self._match(TokenType.L_PAREN): 2400 while self._curr and not self._match(TokenType.R_PAREN): 2401 if self._match_text_seq("FILTER_COLUMN", "="): 2402 prop.set("filter_column", self._parse_column()) 2403 elif self._match_text_seq("RETENTION_PERIOD", "="): 2404 prop.set("retention_period", self._parse_retention_period()) 2405 2406 self._match(TokenType.COMMA) 2407 2408 return prop 2409 2410 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2411 kind = "HASH" 2412 expressions: t.Optional[t.List[exp.Expression]] = None 2413 if self._match_text_seq("BY", "HASH"): 2414 expressions = self._parse_wrapped_csv(self._parse_id_var) 2415 elif self._match_text_seq("BY", "RANDOM"): 2416 kind = "RANDOM" 2417 2418 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2419 buckets: t.Optional[exp.Expression] = None 2420 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2421 buckets = self._parse_number() 2422 2423 return self.expression( 2424 exp.DistributedByProperty, 2425 expressions=expressions, 2426 kind=kind, 2427 buckets=buckets, 2428 order=self._parse_order(), 2429 ) 2430 2431 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2432 self._match_text_seq("KEY") 2433 expressions = self._parse_wrapped_id_vars() 2434 return self.expression(expr_type, expressions=expressions) 2435 2436 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2437 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2438 prop = self._parse_system_versioning_property(with_=True) 2439 self._match_r_paren() 2440 return prop 2441 2442 if self._match(TokenType.L_PAREN, advance=False): 2443 return self._parse_wrapped_properties() 2444 2445 if self._match_text_seq("JOURNAL"): 2446 return self._parse_withjournaltable() 2447 2448 if self._match_texts(self.VIEW_ATTRIBUTES): 2449 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2450 2451 if self._match_text_seq("DATA"): 2452 return self._parse_withdata(no=False) 2453 elif self._match_text_seq("NO", "DATA"): 2454 return self._parse_withdata(no=True) 2455 2456 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2457 return self._parse_serde_properties(with_=True) 2458 2459 if self._match(TokenType.SCHEMA): 2460 return self.expression( 2461 exp.WithSchemaBindingProperty, 2462 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2463 ) 2464 2465 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2466 return self.expression( 2467 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2468 ) 2469 2470 if not self._next: 2471 return None 2472 2473 return self._parse_withisolatedloading() 2474 2475 def _parse_procedure_option(self) -> exp.Expression | None: 2476 if self._match_text_seq("EXECUTE", "AS"): 2477 return self.expression( 2478 exp.ExecuteAsProperty, 2479 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2480 or self._parse_string(), 2481 ) 2482 2483 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2484 2485 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2486 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2487 self._match(TokenType.EQ) 2488 2489 user = self._parse_id_var() 2490 self._match(TokenType.PARAMETER) 2491 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2492 2493 if not user or not host: 2494 return None 2495 2496 return exp.DefinerProperty(this=f"{user}@{host}") 2497 2498 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2499 self._match(TokenType.TABLE) 2500 self._match(TokenType.EQ) 2501 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2502 2503 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2504 return self.expression(exp.LogProperty, no=no) 2505 2506 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2507 return self.expression(exp.JournalProperty, **kwargs) 2508 2509 def _parse_checksum(self) -> exp.ChecksumProperty: 2510 self._match(TokenType.EQ) 2511 2512 on = None 2513 if self._match(TokenType.ON): 2514 on = True 2515 elif self._match_text_seq("OFF"): 2516 on = False 2517 2518 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2519 2520 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2521 return self.expression( 2522 exp.Cluster, 2523 expressions=( 2524 self._parse_wrapped_csv(self._parse_ordered) 2525 if wrapped 2526 else self._parse_csv(self._parse_ordered) 2527 ), 2528 ) 2529 2530 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2531 self._match_text_seq("BY") 2532 2533 self._match_l_paren() 2534 expressions = self._parse_csv(self._parse_column) 2535 self._match_r_paren() 2536 2537 if self._match_text_seq("SORTED", "BY"): 2538 self._match_l_paren() 2539 sorted_by = self._parse_csv(self._parse_ordered) 2540 self._match_r_paren() 2541 else: 2542 sorted_by = None 2543 2544 self._match(TokenType.INTO) 2545 buckets = self._parse_number() 2546 self._match_text_seq("BUCKETS") 2547 2548 return self.expression( 2549 exp.ClusteredByProperty, 2550 expressions=expressions, 2551 sorted_by=sorted_by, 2552 buckets=buckets, 2553 ) 2554 2555 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2556 if not self._match_text_seq("GRANTS"): 2557 self._retreat(self._index - 1) 2558 return None 2559 2560 return self.expression(exp.CopyGrantsProperty) 2561 2562 def _parse_freespace(self) -> exp.FreespaceProperty: 2563 self._match(TokenType.EQ) 2564 return self.expression( 2565 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2566 ) 2567 2568 def _parse_mergeblockratio( 2569 self, no: bool = False, default: bool = False 2570 ) -> exp.MergeBlockRatioProperty: 2571 if self._match(TokenType.EQ): 2572 return self.expression( 2573 exp.MergeBlockRatioProperty, 2574 this=self._parse_number(), 2575 percent=self._match(TokenType.PERCENT), 2576 ) 2577 2578 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2579 2580 def _parse_datablocksize( 2581 self, 2582 default: t.Optional[bool] = None, 2583 minimum: t.Optional[bool] = None, 2584 maximum: t.Optional[bool] = None, 2585 ) -> exp.DataBlocksizeProperty: 2586 self._match(TokenType.EQ) 2587 size = self._parse_number() 2588 2589 units = None 2590 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2591 units = self._prev.text 2592 2593 return self.expression( 2594 exp.DataBlocksizeProperty, 2595 size=size, 2596 units=units, 2597 default=default, 2598 minimum=minimum, 2599 maximum=maximum, 2600 ) 2601 2602 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2603 self._match(TokenType.EQ) 2604 always = self._match_text_seq("ALWAYS") 2605 manual = self._match_text_seq("MANUAL") 2606 never = self._match_text_seq("NEVER") 2607 default = self._match_text_seq("DEFAULT") 2608 2609 autotemp = None 2610 if self._match_text_seq("AUTOTEMP"): 2611 autotemp = self._parse_schema() 2612 2613 return self.expression( 2614 exp.BlockCompressionProperty, 2615 always=always, 2616 manual=manual, 2617 never=never, 2618 default=default, 2619 autotemp=autotemp, 2620 ) 2621 2622 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2623 index = self._index 2624 no = self._match_text_seq("NO") 2625 concurrent = self._match_text_seq("CONCURRENT") 2626 2627 if not self._match_text_seq("ISOLATED", "LOADING"): 2628 self._retreat(index) 2629 return None 2630 2631 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2632 return self.expression( 2633 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2634 ) 2635 2636 def _parse_locking(self) -> exp.LockingProperty: 2637 if self._match(TokenType.TABLE): 2638 kind = "TABLE" 2639 elif self._match(TokenType.VIEW): 2640 kind = "VIEW" 2641 elif self._match(TokenType.ROW): 2642 kind = "ROW" 2643 elif self._match_text_seq("DATABASE"): 2644 kind = "DATABASE" 2645 else: 2646 kind = None 2647 2648 if kind in ("DATABASE", "TABLE", "VIEW"): 2649 this = self._parse_table_parts() 2650 else: 2651 this = None 2652 2653 if self._match(TokenType.FOR): 2654 for_or_in = "FOR" 2655 elif self._match(TokenType.IN): 2656 for_or_in = "IN" 2657 else: 2658 for_or_in = None 2659 2660 if self._match_text_seq("ACCESS"): 2661 lock_type = "ACCESS" 2662 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2663 lock_type = "EXCLUSIVE" 2664 elif self._match_text_seq("SHARE"): 2665 lock_type = "SHARE" 2666 elif self._match_text_seq("READ"): 2667 lock_type = "READ" 2668 elif self._match_text_seq("WRITE"): 2669 lock_type = "WRITE" 2670 elif self._match_text_seq("CHECKSUM"): 2671 lock_type = "CHECKSUM" 2672 else: 2673 lock_type = None 2674 2675 override = self._match_text_seq("OVERRIDE") 2676 2677 return self.expression( 2678 exp.LockingProperty, 2679 this=this, 2680 kind=kind, 2681 for_or_in=for_or_in, 2682 lock_type=lock_type, 2683 override=override, 2684 ) 2685 2686 def _parse_partition_by(self) -> t.List[exp.Expression]: 2687 if self._match(TokenType.PARTITION_BY): 2688 return self._parse_csv(self._parse_assignment) 2689 return [] 2690 2691 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2692 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2693 if self._match_text_seq("MINVALUE"): 2694 return exp.var("MINVALUE") 2695 if self._match_text_seq("MAXVALUE"): 2696 return exp.var("MAXVALUE") 2697 return self._parse_bitwise() 2698 2699 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2700 expression = None 2701 from_expressions = None 2702 to_expressions = None 2703 2704 if self._match(TokenType.IN): 2705 this = self._parse_wrapped_csv(self._parse_bitwise) 2706 elif self._match(TokenType.FROM): 2707 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2708 self._match_text_seq("TO") 2709 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2710 elif self._match_text_seq("WITH", "(", "MODULUS"): 2711 this = self._parse_number() 2712 self._match_text_seq(",", "REMAINDER") 2713 expression = self._parse_number() 2714 self._match_r_paren() 2715 else: 2716 self.raise_error("Failed to parse partition bound spec.") 2717 2718 return self.expression( 2719 exp.PartitionBoundSpec, 2720 this=this, 2721 expression=expression, 2722 from_expressions=from_expressions, 2723 to_expressions=to_expressions, 2724 ) 2725 2726 # https://www.postgresql.org/docs/current/sql-createtable.html 2727 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2728 if not self._match_text_seq("OF"): 2729 self._retreat(self._index - 1) 2730 return None 2731 2732 this = self._parse_table(schema=True) 2733 2734 if self._match(TokenType.DEFAULT): 2735 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2736 elif self._match_text_seq("FOR", "VALUES"): 2737 expression = self._parse_partition_bound_spec() 2738 else: 2739 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2740 2741 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2742 2743 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2744 self._match(TokenType.EQ) 2745 return self.expression( 2746 exp.PartitionedByProperty, 2747 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2748 ) 2749 2750 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2751 if self._match_text_seq("AND", "STATISTICS"): 2752 statistics = True 2753 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2754 statistics = False 2755 else: 2756 statistics = None 2757 2758 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2759 2760 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2761 if self._match_text_seq("SQL"): 2762 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2763 return None 2764 2765 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2766 if self._match_text_seq("SQL", "DATA"): 2767 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2768 return None 2769 2770 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2771 if self._match_text_seq("PRIMARY", "INDEX"): 2772 return exp.NoPrimaryIndexProperty() 2773 if self._match_text_seq("SQL"): 2774 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2775 return None 2776 2777 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2778 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2779 return exp.OnCommitProperty() 2780 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2781 return exp.OnCommitProperty(delete=True) 2782 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2783 2784 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2785 if self._match_text_seq("SQL", "DATA"): 2786 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2787 return None 2788 2789 def _parse_distkey(self) -> exp.DistKeyProperty: 2790 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2791 2792 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2793 table = self._parse_table(schema=True) 2794 2795 options = [] 2796 while self._match_texts(("INCLUDING", "EXCLUDING")): 2797 this = self._prev.text.upper() 2798 2799 id_var = self._parse_id_var() 2800 if not id_var: 2801 return None 2802 2803 options.append( 2804 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2805 ) 2806 2807 return self.expression(exp.LikeProperty, this=table, expressions=options) 2808 2809 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2810 return self.expression( 2811 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2812 ) 2813 2814 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2815 self._match(TokenType.EQ) 2816 return self.expression( 2817 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2818 ) 2819 2820 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2821 self._match_text_seq("WITH", "CONNECTION") 2822 return self.expression( 2823 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2824 ) 2825 2826 def _parse_returns(self) -> exp.ReturnsProperty: 2827 value: t.Optional[exp.Expression] 2828 null = None 2829 is_table = self._match(TokenType.TABLE) 2830 2831 if is_table: 2832 if self._match(TokenType.LT): 2833 value = self.expression( 2834 exp.Schema, 2835 this="TABLE", 2836 expressions=self._parse_csv(self._parse_struct_types), 2837 ) 2838 if not self._match(TokenType.GT): 2839 self.raise_error("Expecting >") 2840 else: 2841 value = self._parse_schema(exp.var("TABLE")) 2842 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2843 null = True 2844 value = None 2845 else: 2846 value = self._parse_types() 2847 2848 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2849 2850 def _parse_describe(self) -> exp.Describe: 2851 kind = self._match_set(self.CREATABLES) and self._prev.text 2852 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2853 if self._match(TokenType.DOT): 2854 style = None 2855 self._retreat(self._index - 2) 2856 2857 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2858 2859 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2860 this = self._parse_statement() 2861 else: 2862 this = self._parse_table(schema=True) 2863 2864 properties = self._parse_properties() 2865 expressions = properties.expressions if properties else None 2866 partition = self._parse_partition() 2867 return self.expression( 2868 exp.Describe, 2869 this=this, 2870 style=style, 2871 kind=kind, 2872 expressions=expressions, 2873 partition=partition, 2874 format=format, 2875 ) 2876 2877 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2878 kind = self._prev.text.upper() 2879 expressions = [] 2880 2881 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2882 if self._match(TokenType.WHEN): 2883 expression = self._parse_disjunction() 2884 self._match(TokenType.THEN) 2885 else: 2886 expression = None 2887 2888 else_ = self._match(TokenType.ELSE) 2889 2890 if not self._match(TokenType.INTO): 2891 return None 2892 2893 return self.expression( 2894 exp.ConditionalInsert, 2895 this=self.expression( 2896 exp.Insert, 2897 this=self._parse_table(schema=True), 2898 expression=self._parse_derived_table_values(), 2899 ), 2900 expression=expression, 2901 else_=else_, 2902 ) 2903 2904 expression = parse_conditional_insert() 2905 while expression is not None: 2906 expressions.append(expression) 2907 expression = parse_conditional_insert() 2908 2909 return self.expression( 2910 exp.MultitableInserts, 2911 kind=kind, 2912 comments=comments, 2913 expressions=expressions, 2914 source=self._parse_table(), 2915 ) 2916 2917 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2918 comments = [] 2919 hint = self._parse_hint() 2920 overwrite = self._match(TokenType.OVERWRITE) 2921 ignore = self._match(TokenType.IGNORE) 2922 local = self._match_text_seq("LOCAL") 2923 alternative = None 2924 is_function = None 2925 2926 if self._match_text_seq("DIRECTORY"): 2927 this: t.Optional[exp.Expression] = self.expression( 2928 exp.Directory, 2929 this=self._parse_var_or_string(), 2930 local=local, 2931 row_format=self._parse_row_format(match_row=True), 2932 ) 2933 else: 2934 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2935 comments += ensure_list(self._prev_comments) 2936 return self._parse_multitable_inserts(comments) 2937 2938 if self._match(TokenType.OR): 2939 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2940 2941 self._match(TokenType.INTO) 2942 comments += ensure_list(self._prev_comments) 2943 self._match(TokenType.TABLE) 2944 is_function = self._match(TokenType.FUNCTION) 2945 2946 this = ( 2947 self._parse_table(schema=True, parse_partition=True) 2948 if not is_function 2949 else self._parse_function() 2950 ) 2951 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2952 this.set("alias", self._parse_table_alias()) 2953 2954 returning = self._parse_returning() 2955 2956 return self.expression( 2957 exp.Insert, 2958 comments=comments, 2959 hint=hint, 2960 is_function=is_function, 2961 this=this, 2962 stored=self._match_text_seq("STORED") and self._parse_stored(), 2963 by_name=self._match_text_seq("BY", "NAME"), 2964 exists=self._parse_exists(), 2965 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2966 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2967 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2968 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2969 conflict=self._parse_on_conflict(), 2970 returning=returning or self._parse_returning(), 2971 overwrite=overwrite, 2972 alternative=alternative, 2973 ignore=ignore, 2974 source=self._match(TokenType.TABLE) and self._parse_table(), 2975 ) 2976 2977 def _parse_kill(self) -> exp.Kill: 2978 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2979 2980 return self.expression( 2981 exp.Kill, 2982 this=self._parse_primary(), 2983 kind=kind, 2984 ) 2985 2986 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2987 conflict = self._match_text_seq("ON", "CONFLICT") 2988 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2989 2990 if not conflict and not duplicate: 2991 return None 2992 2993 conflict_keys = None 2994 constraint = None 2995 2996 if conflict: 2997 if self._match_text_seq("ON", "CONSTRAINT"): 2998 constraint = self._parse_id_var() 2999 elif self._match(TokenType.L_PAREN): 3000 conflict_keys = self._parse_csv(self._parse_id_var) 3001 self._match_r_paren() 3002 3003 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3004 if self._prev.token_type == TokenType.UPDATE: 3005 self._match(TokenType.SET) 3006 expressions = self._parse_csv(self._parse_equality) 3007 else: 3008 expressions = None 3009 3010 return self.expression( 3011 exp.OnConflict, 3012 duplicate=duplicate, 3013 expressions=expressions, 3014 action=action, 3015 conflict_keys=conflict_keys, 3016 constraint=constraint, 3017 where=self._parse_where(), 3018 ) 3019 3020 def _parse_returning(self) -> t.Optional[exp.Returning]: 3021 if not self._match(TokenType.RETURNING): 3022 return None 3023 return self.expression( 3024 exp.Returning, 3025 expressions=self._parse_csv(self._parse_expression), 3026 into=self._match(TokenType.INTO) and self._parse_table_part(), 3027 ) 3028 3029 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3030 if not self._match(TokenType.FORMAT): 3031 return None 3032 return self._parse_row_format() 3033 3034 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3035 index = self._index 3036 with_ = with_ or self._match_text_seq("WITH") 3037 3038 if not self._match(TokenType.SERDE_PROPERTIES): 3039 self._retreat(index) 3040 return None 3041 return self.expression( 3042 exp.SerdeProperties, 3043 **{ # type: ignore 3044 "expressions": self._parse_wrapped_properties(), 3045 "with": with_, 3046 }, 3047 ) 3048 3049 def _parse_row_format( 3050 self, match_row: bool = False 3051 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3052 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3053 return None 3054 3055 if self._match_text_seq("SERDE"): 3056 this = self._parse_string() 3057 3058 serde_properties = self._parse_serde_properties() 3059 3060 return self.expression( 3061 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3062 ) 3063 3064 self._match_text_seq("DELIMITED") 3065 3066 kwargs = {} 3067 3068 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3069 kwargs["fields"] = self._parse_string() 3070 if self._match_text_seq("ESCAPED", "BY"): 3071 kwargs["escaped"] = self._parse_string() 3072 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3073 kwargs["collection_items"] = self._parse_string() 3074 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3075 kwargs["map_keys"] = self._parse_string() 3076 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3077 kwargs["lines"] = self._parse_string() 3078 if self._match_text_seq("NULL", "DEFINED", "AS"): 3079 kwargs["null"] = self._parse_string() 3080 3081 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3082 3083 def _parse_load(self) -> exp.LoadData | exp.Command: 3084 if self._match_text_seq("DATA"): 3085 local = self._match_text_seq("LOCAL") 3086 self._match_text_seq("INPATH") 3087 inpath = self._parse_string() 3088 overwrite = self._match(TokenType.OVERWRITE) 3089 self._match_pair(TokenType.INTO, TokenType.TABLE) 3090 3091 return self.expression( 3092 exp.LoadData, 3093 this=self._parse_table(schema=True), 3094 local=local, 3095 overwrite=overwrite, 3096 inpath=inpath, 3097 partition=self._parse_partition(), 3098 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3099 serde=self._match_text_seq("SERDE") and self._parse_string(), 3100 ) 3101 return self._parse_as_command(self._prev) 3102 3103 def _parse_delete(self) -> exp.Delete: 3104 # This handles MySQL's "Multiple-Table Syntax" 3105 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3106 tables = None 3107 if not self._match(TokenType.FROM, advance=False): 3108 tables = self._parse_csv(self._parse_table) or None 3109 3110 returning = self._parse_returning() 3111 3112 return self.expression( 3113 exp.Delete, 3114 tables=tables, 3115 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3116 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3117 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3118 where=self._parse_where(), 3119 returning=returning or self._parse_returning(), 3120 limit=self._parse_limit(), 3121 ) 3122 3123 def _parse_update(self) -> exp.Update: 3124 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3125 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3126 returning = self._parse_returning() 3127 return self.expression( 3128 exp.Update, 3129 **{ # type: ignore 3130 "this": this, 3131 "expressions": expressions, 3132 "from": self._parse_from(joins=True), 3133 "where": self._parse_where(), 3134 "returning": returning or self._parse_returning(), 3135 "order": self._parse_order(), 3136 "limit": self._parse_limit(), 3137 }, 3138 ) 3139 3140 def _parse_use(self) -> exp.Use: 3141 return self.expression( 3142 exp.Use, 3143 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3144 this=self._parse_table(schema=False), 3145 ) 3146 3147 def _parse_uncache(self) -> exp.Uncache: 3148 if not self._match(TokenType.TABLE): 3149 self.raise_error("Expecting TABLE after UNCACHE") 3150 3151 return self.expression( 3152 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3153 ) 3154 3155 def _parse_cache(self) -> exp.Cache: 3156 lazy = self._match_text_seq("LAZY") 3157 self._match(TokenType.TABLE) 3158 table = self._parse_table(schema=True) 3159 3160 options = [] 3161 if self._match_text_seq("OPTIONS"): 3162 self._match_l_paren() 3163 k = self._parse_string() 3164 self._match(TokenType.EQ) 3165 v = self._parse_string() 3166 options = [k, v] 3167 self._match_r_paren() 3168 3169 self._match(TokenType.ALIAS) 3170 return self.expression( 3171 exp.Cache, 3172 this=table, 3173 lazy=lazy, 3174 options=options, 3175 expression=self._parse_select(nested=True), 3176 ) 3177 3178 def _parse_partition(self) -> t.Optional[exp.Partition]: 3179 if not self._match_texts(self.PARTITION_KEYWORDS): 3180 return None 3181 3182 return self.expression( 3183 exp.Partition, 3184 subpartition=self._prev.text.upper() == "SUBPARTITION", 3185 expressions=self._parse_wrapped_csv(self._parse_assignment), 3186 ) 3187 3188 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3189 def _parse_value_expression() -> t.Optional[exp.Expression]: 3190 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3191 return exp.var(self._prev.text.upper()) 3192 return self._parse_expression() 3193 3194 if self._match(TokenType.L_PAREN): 3195 expressions = self._parse_csv(_parse_value_expression) 3196 self._match_r_paren() 3197 return self.expression(exp.Tuple, expressions=expressions) 3198 3199 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3200 expression = self._parse_expression() 3201 if expression: 3202 return self.expression(exp.Tuple, expressions=[expression]) 3203 return None 3204 3205 def _parse_projections(self) -> t.List[exp.Expression]: 3206 return self._parse_expressions() 3207 3208 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3209 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3210 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3211 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3212 ) 3213 elif self._match(TokenType.FROM): 3214 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3215 # Support parentheses for duckdb FROM-first syntax 3216 select = self._parse_select(from_=from_) 3217 if select: 3218 if not select.args.get("from"): 3219 select.set("from", from_) 3220 this = select 3221 else: 3222 this = exp.select("*").from_(t.cast(exp.From, from_)) 3223 else: 3224 this = ( 3225 self._parse_table(consume_pipe=True) 3226 if table 3227 else self._parse_select(nested=True, parse_set_operation=False) 3228 ) 3229 3230 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3231 # in case a modifier (e.g. join) is following 3232 if table and isinstance(this, exp.Values) and this.alias: 3233 alias = this.args["alias"].pop() 3234 this = exp.Table(this=this, alias=alias) 3235 3236 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3237 3238 return this 3239 3240 def _parse_select( 3241 self, 3242 nested: bool = False, 3243 table: bool = False, 3244 parse_subquery_alias: bool = True, 3245 parse_set_operation: bool = True, 3246 consume_pipe: bool = True, 3247 from_: t.Optional[exp.From] = None, 3248 ) -> t.Optional[exp.Expression]: 3249 query = self._parse_select_query( 3250 nested=nested, 3251 table=table, 3252 parse_subquery_alias=parse_subquery_alias, 3253 parse_set_operation=parse_set_operation, 3254 ) 3255 3256 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3257 if not query and from_: 3258 query = exp.select("*").from_(from_) 3259 if isinstance(query, exp.Query): 3260 query = self._parse_pipe_syntax_query(query) 3261 query = query.subquery(copy=False) if query and table else query 3262 3263 return query 3264 3265 def _parse_select_query( 3266 self, 3267 nested: bool = False, 3268 table: bool = False, 3269 parse_subquery_alias: bool = True, 3270 parse_set_operation: bool = True, 3271 ) -> t.Optional[exp.Expression]: 3272 cte = self._parse_with() 3273 3274 if cte: 3275 this = self._parse_statement() 3276 3277 if not this: 3278 self.raise_error("Failed to parse any statement following CTE") 3279 return cte 3280 3281 if "with" in this.arg_types: 3282 this.set("with", cte) 3283 else: 3284 self.raise_error(f"{this.key} does not support CTE") 3285 this = cte 3286 3287 return this 3288 3289 # duckdb supports leading with FROM x 3290 from_ = ( 3291 self._parse_from(consume_pipe=True) 3292 if self._match(TokenType.FROM, advance=False) 3293 else None 3294 ) 3295 3296 if self._match(TokenType.SELECT): 3297 comments = self._prev_comments 3298 3299 hint = self._parse_hint() 3300 3301 if self._next and not self._next.token_type == TokenType.DOT: 3302 all_ = self._match(TokenType.ALL) 3303 distinct = self._match_set(self.DISTINCT_TOKENS) 3304 else: 3305 all_, distinct = None, None 3306 3307 kind = ( 3308 self._match(TokenType.ALIAS) 3309 and self._match_texts(("STRUCT", "VALUE")) 3310 and self._prev.text.upper() 3311 ) 3312 3313 if distinct: 3314 distinct = self.expression( 3315 exp.Distinct, 3316 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3317 ) 3318 3319 if all_ and distinct: 3320 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3321 3322 operation_modifiers = [] 3323 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3324 operation_modifiers.append(exp.var(self._prev.text.upper())) 3325 3326 limit = self._parse_limit(top=True) 3327 projections = self._parse_projections() 3328 3329 this = self.expression( 3330 exp.Select, 3331 kind=kind, 3332 hint=hint, 3333 distinct=distinct, 3334 expressions=projections, 3335 limit=limit, 3336 operation_modifiers=operation_modifiers or None, 3337 ) 3338 this.comments = comments 3339 3340 into = self._parse_into() 3341 if into: 3342 this.set("into", into) 3343 3344 if not from_: 3345 from_ = self._parse_from() 3346 3347 if from_: 3348 this.set("from", from_) 3349 3350 this = self._parse_query_modifiers(this) 3351 elif (table or nested) and self._match(TokenType.L_PAREN): 3352 this = self._parse_wrapped_select(table=table) 3353 3354 # We return early here so that the UNION isn't attached to the subquery by the 3355 # following call to _parse_set_operations, but instead becomes the parent node 3356 self._match_r_paren() 3357 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3358 elif self._match(TokenType.VALUES, advance=False): 3359 this = self._parse_derived_table_values() 3360 elif from_: 3361 this = exp.select("*").from_(from_.this, copy=False) 3362 elif self._match(TokenType.SUMMARIZE): 3363 table = self._match(TokenType.TABLE) 3364 this = self._parse_select() or self._parse_string() or self._parse_table() 3365 return self.expression(exp.Summarize, this=this, table=table) 3366 elif self._match(TokenType.DESCRIBE): 3367 this = self._parse_describe() 3368 elif self._match_text_seq("STREAM"): 3369 this = self._parse_function() 3370 if this: 3371 this = self.expression(exp.Stream, this=this) 3372 else: 3373 self._retreat(self._index - 1) 3374 else: 3375 this = None 3376 3377 return self._parse_set_operations(this) if parse_set_operation else this 3378 3379 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3380 self._match_text_seq("SEARCH") 3381 3382 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3383 3384 if not kind: 3385 return None 3386 3387 self._match_text_seq("FIRST", "BY") 3388 3389 return self.expression( 3390 exp.RecursiveWithSearch, 3391 kind=kind, 3392 this=self._parse_id_var(), 3393 expression=self._match_text_seq("SET") and self._parse_id_var(), 3394 using=self._match_text_seq("USING") and self._parse_id_var(), 3395 ) 3396 3397 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3398 if not skip_with_token and not self._match(TokenType.WITH): 3399 return None 3400 3401 comments = self._prev_comments 3402 recursive = self._match(TokenType.RECURSIVE) 3403 3404 last_comments = None 3405 expressions = [] 3406 while True: 3407 cte = self._parse_cte() 3408 if isinstance(cte, exp.CTE): 3409 expressions.append(cte) 3410 if last_comments: 3411 cte.add_comments(last_comments) 3412 3413 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3414 break 3415 else: 3416 self._match(TokenType.WITH) 3417 3418 last_comments = self._prev_comments 3419 3420 return self.expression( 3421 exp.With, 3422 comments=comments, 3423 expressions=expressions, 3424 recursive=recursive, 3425 search=self._parse_recursive_with_search(), 3426 ) 3427 3428 def _parse_cte(self) -> t.Optional[exp.CTE]: 3429 index = self._index 3430 3431 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3432 if not alias or not alias.this: 3433 self.raise_error("Expected CTE to have alias") 3434 3435 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3436 self._retreat(index) 3437 return None 3438 3439 comments = self._prev_comments 3440 3441 if self._match_text_seq("NOT", "MATERIALIZED"): 3442 materialized = False 3443 elif self._match_text_seq("MATERIALIZED"): 3444 materialized = True 3445 else: 3446 materialized = None 3447 3448 cte = self.expression( 3449 exp.CTE, 3450 this=self._parse_wrapped(self._parse_statement), 3451 alias=alias, 3452 materialized=materialized, 3453 comments=comments, 3454 ) 3455 3456 values = cte.this 3457 if isinstance(values, exp.Values): 3458 if values.alias: 3459 cte.set("this", exp.select("*").from_(values)) 3460 else: 3461 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3462 3463 return cte 3464 3465 def _parse_table_alias( 3466 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3467 ) -> t.Optional[exp.TableAlias]: 3468 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3469 # so this section tries to parse the clause version and if it fails, it treats the token 3470 # as an identifier (alias) 3471 if self._can_parse_limit_or_offset(): 3472 return None 3473 3474 any_token = self._match(TokenType.ALIAS) 3475 alias = ( 3476 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3477 or self._parse_string_as_identifier() 3478 ) 3479 3480 index = self._index 3481 if self._match(TokenType.L_PAREN): 3482 columns = self._parse_csv(self._parse_function_parameter) 3483 self._match_r_paren() if columns else self._retreat(index) 3484 else: 3485 columns = None 3486 3487 if not alias and not columns: 3488 return None 3489 3490 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3491 3492 # We bubble up comments from the Identifier to the TableAlias 3493 if isinstance(alias, exp.Identifier): 3494 table_alias.add_comments(alias.pop_comments()) 3495 3496 return table_alias 3497 3498 def _parse_subquery( 3499 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3500 ) -> t.Optional[exp.Subquery]: 3501 if not this: 3502 return None 3503 3504 return self.expression( 3505 exp.Subquery, 3506 this=this, 3507 pivots=self._parse_pivots(), 3508 alias=self._parse_table_alias() if parse_alias else None, 3509 sample=self._parse_table_sample(), 3510 ) 3511 3512 def _implicit_unnests_to_explicit(self, this: E) -> E: 3513 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3514 3515 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3516 for i, join in enumerate(this.args.get("joins") or []): 3517 table = join.this 3518 normalized_table = table.copy() 3519 normalized_table.meta["maybe_column"] = True 3520 normalized_table = _norm(normalized_table, dialect=self.dialect) 3521 3522 if isinstance(table, exp.Table) and not join.args.get("on"): 3523 if normalized_table.parts[0].name in refs: 3524 table_as_column = table.to_column() 3525 unnest = exp.Unnest(expressions=[table_as_column]) 3526 3527 # Table.to_column creates a parent Alias node that we want to convert to 3528 # a TableAlias and attach to the Unnest, so it matches the parser's output 3529 if isinstance(table.args.get("alias"), exp.TableAlias): 3530 table_as_column.replace(table_as_column.this) 3531 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3532 3533 table.replace(unnest) 3534 3535 refs.add(normalized_table.alias_or_name) 3536 3537 return this 3538 3539 def _parse_query_modifiers( 3540 self, this: t.Optional[exp.Expression] 3541 ) -> t.Optional[exp.Expression]: 3542 if isinstance(this, self.MODIFIABLES): 3543 for join in self._parse_joins(): 3544 this.append("joins", join) 3545 for lateral in iter(self._parse_lateral, None): 3546 this.append("laterals", lateral) 3547 3548 while True: 3549 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3550 modifier_token = self._curr 3551 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3552 key, expression = parser(self) 3553 3554 if expression: 3555 if this.args.get(key): 3556 self.raise_error( 3557 f"Found multiple '{modifier_token.text.upper()}' clauses", 3558 token=modifier_token, 3559 ) 3560 3561 this.set(key, expression) 3562 if key == "limit": 3563 offset = expression.args.pop("offset", None) 3564 3565 if offset: 3566 offset = exp.Offset(expression=offset) 3567 this.set("offset", offset) 3568 3569 limit_by_expressions = expression.expressions 3570 expression.set("expressions", None) 3571 offset.set("expressions", limit_by_expressions) 3572 continue 3573 break 3574 3575 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3576 this = self._implicit_unnests_to_explicit(this) 3577 3578 return this 3579 3580 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3581 start = self._curr 3582 while self._curr: 3583 self._advance() 3584 3585 end = self._tokens[self._index - 1] 3586 return exp.Hint(expressions=[self._find_sql(start, end)]) 3587 3588 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3589 return self._parse_function_call() 3590 3591 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3592 start_index = self._index 3593 should_fallback_to_string = False 3594 3595 hints = [] 3596 try: 3597 for hint in iter( 3598 lambda: self._parse_csv( 3599 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3600 ), 3601 [], 3602 ): 3603 hints.extend(hint) 3604 except ParseError: 3605 should_fallback_to_string = True 3606 3607 if should_fallback_to_string or self._curr: 3608 self._retreat(start_index) 3609 return self._parse_hint_fallback_to_string() 3610 3611 return self.expression(exp.Hint, expressions=hints) 3612 3613 def _parse_hint(self) -> t.Optional[exp.Hint]: 3614 if self._match(TokenType.HINT) and self._prev_comments: 3615 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3616 3617 return None 3618 3619 def _parse_into(self) -> t.Optional[exp.Into]: 3620 if not self._match(TokenType.INTO): 3621 return None 3622 3623 temp = self._match(TokenType.TEMPORARY) 3624 unlogged = self._match_text_seq("UNLOGGED") 3625 self._match(TokenType.TABLE) 3626 3627 return self.expression( 3628 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3629 ) 3630 3631 def _parse_from( 3632 self, 3633 joins: bool = False, 3634 skip_from_token: bool = False, 3635 consume_pipe: bool = False, 3636 ) -> t.Optional[exp.From]: 3637 if not skip_from_token and not self._match(TokenType.FROM): 3638 return None 3639 3640 return self.expression( 3641 exp.From, 3642 comments=self._prev_comments, 3643 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3644 ) 3645 3646 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3647 return self.expression( 3648 exp.MatchRecognizeMeasure, 3649 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3650 this=self._parse_expression(), 3651 ) 3652 3653 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3654 if not self._match(TokenType.MATCH_RECOGNIZE): 3655 return None 3656 3657 self._match_l_paren() 3658 3659 partition = self._parse_partition_by() 3660 order = self._parse_order() 3661 3662 measures = ( 3663 self._parse_csv(self._parse_match_recognize_measure) 3664 if self._match_text_seq("MEASURES") 3665 else None 3666 ) 3667 3668 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3669 rows = exp.var("ONE ROW PER MATCH") 3670 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3671 text = "ALL ROWS PER MATCH" 3672 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3673 text += " SHOW EMPTY MATCHES" 3674 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3675 text += " OMIT EMPTY MATCHES" 3676 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3677 text += " WITH UNMATCHED ROWS" 3678 rows = exp.var(text) 3679 else: 3680 rows = None 3681 3682 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3683 text = "AFTER MATCH SKIP" 3684 if self._match_text_seq("PAST", "LAST", "ROW"): 3685 text += " PAST LAST ROW" 3686 elif self._match_text_seq("TO", "NEXT", "ROW"): 3687 text += " TO NEXT ROW" 3688 elif self._match_text_seq("TO", "FIRST"): 3689 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3690 elif self._match_text_seq("TO", "LAST"): 3691 text += f" TO LAST {self._advance_any().text}" # type: ignore 3692 after = exp.var(text) 3693 else: 3694 after = None 3695 3696 if self._match_text_seq("PATTERN"): 3697 self._match_l_paren() 3698 3699 if not self._curr: 3700 self.raise_error("Expecting )", self._curr) 3701 3702 paren = 1 3703 start = self._curr 3704 3705 while self._curr and paren > 0: 3706 if self._curr.token_type == TokenType.L_PAREN: 3707 paren += 1 3708 if self._curr.token_type == TokenType.R_PAREN: 3709 paren -= 1 3710 3711 end = self._prev 3712 self._advance() 3713 3714 if paren > 0: 3715 self.raise_error("Expecting )", self._curr) 3716 3717 pattern = exp.var(self._find_sql(start, end)) 3718 else: 3719 pattern = None 3720 3721 define = ( 3722 self._parse_csv(self._parse_name_as_expression) 3723 if self._match_text_seq("DEFINE") 3724 else None 3725 ) 3726 3727 self._match_r_paren() 3728 3729 return self.expression( 3730 exp.MatchRecognize, 3731 partition_by=partition, 3732 order=order, 3733 measures=measures, 3734 rows=rows, 3735 after=after, 3736 pattern=pattern, 3737 define=define, 3738 alias=self._parse_table_alias(), 3739 ) 3740 3741 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3742 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3743 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3744 cross_apply = False 3745 3746 if cross_apply is not None: 3747 this = self._parse_select(table=True) 3748 view = None 3749 outer = None 3750 elif self._match(TokenType.LATERAL): 3751 this = self._parse_select(table=True) 3752 view = self._match(TokenType.VIEW) 3753 outer = self._match(TokenType.OUTER) 3754 else: 3755 return None 3756 3757 if not this: 3758 this = ( 3759 self._parse_unnest() 3760 or self._parse_function() 3761 or self._parse_id_var(any_token=False) 3762 ) 3763 3764 while self._match(TokenType.DOT): 3765 this = exp.Dot( 3766 this=this, 3767 expression=self._parse_function() or self._parse_id_var(any_token=False), 3768 ) 3769 3770 ordinality: t.Optional[bool] = None 3771 3772 if view: 3773 table = self._parse_id_var(any_token=False) 3774 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3775 table_alias: t.Optional[exp.TableAlias] = self.expression( 3776 exp.TableAlias, this=table, columns=columns 3777 ) 3778 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3779 # We move the alias from the lateral's child node to the lateral itself 3780 table_alias = this.args["alias"].pop() 3781 else: 3782 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3783 table_alias = self._parse_table_alias() 3784 3785 return self.expression( 3786 exp.Lateral, 3787 this=this, 3788 view=view, 3789 outer=outer, 3790 alias=table_alias, 3791 cross_apply=cross_apply, 3792 ordinality=ordinality, 3793 ) 3794 3795 def _parse_join_parts( 3796 self, 3797 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3798 return ( 3799 self._match_set(self.JOIN_METHODS) and self._prev, 3800 self._match_set(self.JOIN_SIDES) and self._prev, 3801 self._match_set(self.JOIN_KINDS) and self._prev, 3802 ) 3803 3804 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3805 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3806 this = self._parse_column() 3807 if isinstance(this, exp.Column): 3808 return this.this 3809 return this 3810 3811 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3812 3813 def _parse_join( 3814 self, skip_join_token: bool = False, parse_bracket: bool = False 3815 ) -> t.Optional[exp.Join]: 3816 if self._match(TokenType.COMMA): 3817 table = self._try_parse(self._parse_table) 3818 cross_join = self.expression(exp.Join, this=table) if table else None 3819 3820 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3821 cross_join.set("kind", "CROSS") 3822 3823 return cross_join 3824 3825 index = self._index 3826 method, side, kind = self._parse_join_parts() 3827 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3828 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3829 join_comments = self._prev_comments 3830 3831 if not skip_join_token and not join: 3832 self._retreat(index) 3833 kind = None 3834 method = None 3835 side = None 3836 3837 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3838 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3839 3840 if not skip_join_token and not join and not outer_apply and not cross_apply: 3841 return None 3842 3843 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3844 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3845 kwargs["expressions"] = self._parse_csv( 3846 lambda: self._parse_table(parse_bracket=parse_bracket) 3847 ) 3848 3849 if method: 3850 kwargs["method"] = method.text 3851 if side: 3852 kwargs["side"] = side.text 3853 if kind: 3854 kwargs["kind"] = kind.text 3855 if hint: 3856 kwargs["hint"] = hint 3857 3858 if self._match(TokenType.MATCH_CONDITION): 3859 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3860 3861 if self._match(TokenType.ON): 3862 kwargs["on"] = self._parse_assignment() 3863 elif self._match(TokenType.USING): 3864 kwargs["using"] = self._parse_using_identifiers() 3865 elif ( 3866 not method 3867 and not (outer_apply or cross_apply) 3868 and not isinstance(kwargs["this"], exp.Unnest) 3869 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3870 ): 3871 index = self._index 3872 joins: t.Optional[list] = list(self._parse_joins()) 3873 3874 if joins and self._match(TokenType.ON): 3875 kwargs["on"] = self._parse_assignment() 3876 elif joins and self._match(TokenType.USING): 3877 kwargs["using"] = self._parse_using_identifiers() 3878 else: 3879 joins = None 3880 self._retreat(index) 3881 3882 kwargs["this"].set("joins", joins if joins else None) 3883 3884 kwargs["pivots"] = self._parse_pivots() 3885 3886 comments = [c for token in (method, side, kind) if token for c in token.comments] 3887 comments = (join_comments or []) + comments 3888 3889 if ( 3890 self.ADD_JOIN_ON_TRUE 3891 and not kwargs.get("on") 3892 and not kwargs.get("using") 3893 and not kwargs.get("method") 3894 and kwargs.get("kind") in (None, "INNER", "OUTER") 3895 ): 3896 kwargs["on"] = exp.true() 3897 3898 return self.expression(exp.Join, comments=comments, **kwargs) 3899 3900 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3901 this = self._parse_assignment() 3902 3903 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3904 return this 3905 3906 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3907 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3908 3909 return this 3910 3911 def _parse_index_params(self) -> exp.IndexParameters: 3912 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3913 3914 if self._match(TokenType.L_PAREN, advance=False): 3915 columns = self._parse_wrapped_csv(self._parse_with_operator) 3916 else: 3917 columns = None 3918 3919 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3920 partition_by = self._parse_partition_by() 3921 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3922 tablespace = ( 3923 self._parse_var(any_token=True) 3924 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3925 else None 3926 ) 3927 where = self._parse_where() 3928 3929 on = self._parse_field() if self._match(TokenType.ON) else None 3930 3931 return self.expression( 3932 exp.IndexParameters, 3933 using=using, 3934 columns=columns, 3935 include=include, 3936 partition_by=partition_by, 3937 where=where, 3938 with_storage=with_storage, 3939 tablespace=tablespace, 3940 on=on, 3941 ) 3942 3943 def _parse_index( 3944 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3945 ) -> t.Optional[exp.Index]: 3946 if index or anonymous: 3947 unique = None 3948 primary = None 3949 amp = None 3950 3951 self._match(TokenType.ON) 3952 self._match(TokenType.TABLE) # hive 3953 table = self._parse_table_parts(schema=True) 3954 else: 3955 unique = self._match(TokenType.UNIQUE) 3956 primary = self._match_text_seq("PRIMARY") 3957 amp = self._match_text_seq("AMP") 3958 3959 if not self._match(TokenType.INDEX): 3960 return None 3961 3962 index = self._parse_id_var() 3963 table = None 3964 3965 params = self._parse_index_params() 3966 3967 return self.expression( 3968 exp.Index, 3969 this=index, 3970 table=table, 3971 unique=unique, 3972 primary=primary, 3973 amp=amp, 3974 params=params, 3975 ) 3976 3977 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3978 hints: t.List[exp.Expression] = [] 3979 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3980 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3981 hints.append( 3982 self.expression( 3983 exp.WithTableHint, 3984 expressions=self._parse_csv( 3985 lambda: self._parse_function() or self._parse_var(any_token=True) 3986 ), 3987 ) 3988 ) 3989 self._match_r_paren() 3990 else: 3991 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3992 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3993 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3994 3995 self._match_set((TokenType.INDEX, TokenType.KEY)) 3996 if self._match(TokenType.FOR): 3997 hint.set("target", self._advance_any() and self._prev.text.upper()) 3998 3999 hint.set("expressions", self._parse_wrapped_id_vars()) 4000 hints.append(hint) 4001 4002 return hints or None 4003 4004 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4005 return ( 4006 (not schema and self._parse_function(optional_parens=False)) 4007 or self._parse_id_var(any_token=False) 4008 or self._parse_string_as_identifier() 4009 or self._parse_placeholder() 4010 ) 4011 4012 def _parse_table_parts( 4013 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4014 ) -> exp.Table: 4015 catalog = None 4016 db = None 4017 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4018 4019 while self._match(TokenType.DOT): 4020 if catalog: 4021 # This allows nesting the table in arbitrarily many dot expressions if needed 4022 table = self.expression( 4023 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4024 ) 4025 else: 4026 catalog = db 4027 db = table 4028 # "" used for tsql FROM a..b case 4029 table = self._parse_table_part(schema=schema) or "" 4030 4031 if ( 4032 wildcard 4033 and self._is_connected() 4034 and (isinstance(table, exp.Identifier) or not table) 4035 and self._match(TokenType.STAR) 4036 ): 4037 if isinstance(table, exp.Identifier): 4038 table.args["this"] += "*" 4039 else: 4040 table = exp.Identifier(this="*") 4041 4042 # We bubble up comments from the Identifier to the Table 4043 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4044 4045 if is_db_reference: 4046 catalog = db 4047 db = table 4048 table = None 4049 4050 if not table and not is_db_reference: 4051 self.raise_error(f"Expected table name but got {self._curr}") 4052 if not db and is_db_reference: 4053 self.raise_error(f"Expected database name but got {self._curr}") 4054 4055 table = self.expression( 4056 exp.Table, 4057 comments=comments, 4058 this=table, 4059 db=db, 4060 catalog=catalog, 4061 ) 4062 4063 changes = self._parse_changes() 4064 if changes: 4065 table.set("changes", changes) 4066 4067 at_before = self._parse_historical_data() 4068 if at_before: 4069 table.set("when", at_before) 4070 4071 pivots = self._parse_pivots() 4072 if pivots: 4073 table.set("pivots", pivots) 4074 4075 return table 4076 4077 def _parse_table( 4078 self, 4079 schema: bool = False, 4080 joins: bool = False, 4081 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4082 parse_bracket: bool = False, 4083 is_db_reference: bool = False, 4084 parse_partition: bool = False, 4085 consume_pipe: bool = False, 4086 ) -> t.Optional[exp.Expression]: 4087 lateral = self._parse_lateral() 4088 if lateral: 4089 return lateral 4090 4091 unnest = self._parse_unnest() 4092 if unnest: 4093 return unnest 4094 4095 values = self._parse_derived_table_values() 4096 if values: 4097 return values 4098 4099 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4100 if subquery: 4101 if not subquery.args.get("pivots"): 4102 subquery.set("pivots", self._parse_pivots()) 4103 return subquery 4104 4105 bracket = parse_bracket and self._parse_bracket(None) 4106 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4107 4108 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4109 self._parse_table 4110 ) 4111 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4112 4113 only = self._match(TokenType.ONLY) 4114 4115 this = t.cast( 4116 exp.Expression, 4117 bracket 4118 or rows_from 4119 or self._parse_bracket( 4120 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4121 ), 4122 ) 4123 4124 if only: 4125 this.set("only", only) 4126 4127 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4128 self._match_text_seq("*") 4129 4130 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4131 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4132 this.set("partition", self._parse_partition()) 4133 4134 if schema: 4135 return self._parse_schema(this=this) 4136 4137 version = self._parse_version() 4138 4139 if version: 4140 this.set("version", version) 4141 4142 if self.dialect.ALIAS_POST_TABLESAMPLE: 4143 this.set("sample", self._parse_table_sample()) 4144 4145 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4146 if alias: 4147 this.set("alias", alias) 4148 4149 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4150 return self.expression( 4151 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4152 ) 4153 4154 this.set("hints", self._parse_table_hints()) 4155 4156 if not this.args.get("pivots"): 4157 this.set("pivots", self._parse_pivots()) 4158 4159 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4160 this.set("sample", self._parse_table_sample()) 4161 4162 if joins: 4163 for join in self._parse_joins(): 4164 this.append("joins", join) 4165 4166 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4167 this.set("ordinality", True) 4168 this.set("alias", self._parse_table_alias()) 4169 4170 return this 4171 4172 def _parse_version(self) -> t.Optional[exp.Version]: 4173 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4174 this = "TIMESTAMP" 4175 elif self._match(TokenType.VERSION_SNAPSHOT): 4176 this = "VERSION" 4177 else: 4178 return None 4179 4180 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4181 kind = self._prev.text.upper() 4182 start = self._parse_bitwise() 4183 self._match_texts(("TO", "AND")) 4184 end = self._parse_bitwise() 4185 expression: t.Optional[exp.Expression] = self.expression( 4186 exp.Tuple, expressions=[start, end] 4187 ) 4188 elif self._match_text_seq("CONTAINED", "IN"): 4189 kind = "CONTAINED IN" 4190 expression = self.expression( 4191 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4192 ) 4193 elif self._match(TokenType.ALL): 4194 kind = "ALL" 4195 expression = None 4196 else: 4197 self._match_text_seq("AS", "OF") 4198 kind = "AS OF" 4199 expression = self._parse_type() 4200 4201 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4202 4203 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4204 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4205 index = self._index 4206 historical_data = None 4207 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4208 this = self._prev.text.upper() 4209 kind = ( 4210 self._match(TokenType.L_PAREN) 4211 and self._match_texts(self.HISTORICAL_DATA_KIND) 4212 and self._prev.text.upper() 4213 ) 4214 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4215 4216 if expression: 4217 self._match_r_paren() 4218 historical_data = self.expression( 4219 exp.HistoricalData, this=this, kind=kind, expression=expression 4220 ) 4221 else: 4222 self._retreat(index) 4223 4224 return historical_data 4225 4226 def _parse_changes(self) -> t.Optional[exp.Changes]: 4227 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4228 return None 4229 4230 information = self._parse_var(any_token=True) 4231 self._match_r_paren() 4232 4233 return self.expression( 4234 exp.Changes, 4235 information=information, 4236 at_before=self._parse_historical_data(), 4237 end=self._parse_historical_data(), 4238 ) 4239 4240 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4241 if not self._match(TokenType.UNNEST): 4242 return None 4243 4244 expressions = self._parse_wrapped_csv(self._parse_equality) 4245 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4246 4247 alias = self._parse_table_alias() if with_alias else None 4248 4249 if alias: 4250 if self.dialect.UNNEST_COLUMN_ONLY: 4251 if alias.args.get("columns"): 4252 self.raise_error("Unexpected extra column alias in unnest.") 4253 4254 alias.set("columns", [alias.this]) 4255 alias.set("this", None) 4256 4257 columns = alias.args.get("columns") or [] 4258 if offset and len(expressions) < len(columns): 4259 offset = columns.pop() 4260 4261 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4262 self._match(TokenType.ALIAS) 4263 offset = self._parse_id_var( 4264 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4265 ) or exp.to_identifier("offset") 4266 4267 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4268 4269 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4270 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4271 if not is_derived and not ( 4272 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4273 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4274 ): 4275 return None 4276 4277 expressions = self._parse_csv(self._parse_value) 4278 alias = self._parse_table_alias() 4279 4280 if is_derived: 4281 self._match_r_paren() 4282 4283 return self.expression( 4284 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4285 ) 4286 4287 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4288 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4289 as_modifier and self._match_text_seq("USING", "SAMPLE") 4290 ): 4291 return None 4292 4293 bucket_numerator = None 4294 bucket_denominator = None 4295 bucket_field = None 4296 percent = None 4297 size = None 4298 seed = None 4299 4300 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4301 matched_l_paren = self._match(TokenType.L_PAREN) 4302 4303 if self.TABLESAMPLE_CSV: 4304 num = None 4305 expressions = self._parse_csv(self._parse_primary) 4306 else: 4307 expressions = None 4308 num = ( 4309 self._parse_factor() 4310 if self._match(TokenType.NUMBER, advance=False) 4311 else self._parse_primary() or self._parse_placeholder() 4312 ) 4313 4314 if self._match_text_seq("BUCKET"): 4315 bucket_numerator = self._parse_number() 4316 self._match_text_seq("OUT", "OF") 4317 bucket_denominator = bucket_denominator = self._parse_number() 4318 self._match(TokenType.ON) 4319 bucket_field = self._parse_field() 4320 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4321 percent = num 4322 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4323 size = num 4324 else: 4325 percent = num 4326 4327 if matched_l_paren: 4328 self._match_r_paren() 4329 4330 if self._match(TokenType.L_PAREN): 4331 method = self._parse_var(upper=True) 4332 seed = self._match(TokenType.COMMA) and self._parse_number() 4333 self._match_r_paren() 4334 elif self._match_texts(("SEED", "REPEATABLE")): 4335 seed = self._parse_wrapped(self._parse_number) 4336 4337 if not method and self.DEFAULT_SAMPLING_METHOD: 4338 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4339 4340 return self.expression( 4341 exp.TableSample, 4342 expressions=expressions, 4343 method=method, 4344 bucket_numerator=bucket_numerator, 4345 bucket_denominator=bucket_denominator, 4346 bucket_field=bucket_field, 4347 percent=percent, 4348 size=size, 4349 seed=seed, 4350 ) 4351 4352 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4353 return list(iter(self._parse_pivot, None)) or None 4354 4355 def _parse_joins(self) -> t.Iterator[exp.Join]: 4356 return iter(self._parse_join, None) 4357 4358 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4359 if not self._match(TokenType.INTO): 4360 return None 4361 4362 return self.expression( 4363 exp.UnpivotColumns, 4364 this=self._match_text_seq("NAME") and self._parse_column(), 4365 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4366 ) 4367 4368 # https://duckdb.org/docs/sql/statements/pivot 4369 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4370 def _parse_on() -> t.Optional[exp.Expression]: 4371 this = self._parse_bitwise() 4372 4373 if self._match(TokenType.IN): 4374 # PIVOT ... ON col IN (row_val1, row_val2) 4375 return self._parse_in(this) 4376 if self._match(TokenType.ALIAS, advance=False): 4377 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4378 return self._parse_alias(this) 4379 4380 return this 4381 4382 this = self._parse_table() 4383 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4384 into = self._parse_unpivot_columns() 4385 using = self._match(TokenType.USING) and self._parse_csv( 4386 lambda: self._parse_alias(self._parse_function()) 4387 ) 4388 group = self._parse_group() 4389 4390 return self.expression( 4391 exp.Pivot, 4392 this=this, 4393 expressions=expressions, 4394 using=using, 4395 group=group, 4396 unpivot=is_unpivot, 4397 into=into, 4398 ) 4399 4400 def _parse_pivot_in(self) -> exp.In: 4401 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4402 this = self._parse_select_or_expression() 4403 4404 self._match(TokenType.ALIAS) 4405 alias = self._parse_bitwise() 4406 if alias: 4407 if isinstance(alias, exp.Column) and not alias.db: 4408 alias = alias.this 4409 return self.expression(exp.PivotAlias, this=this, alias=alias) 4410 4411 return this 4412 4413 value = self._parse_column() 4414 4415 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4416 self.raise_error("Expecting IN (") 4417 4418 if self._match(TokenType.ANY): 4419 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4420 else: 4421 exprs = self._parse_csv(_parse_aliased_expression) 4422 4423 self._match_r_paren() 4424 return self.expression(exp.In, this=value, expressions=exprs) 4425 4426 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4427 func = self._parse_function() 4428 if not func: 4429 if self._prev and self._prev.token_type == TokenType.COMMA: 4430 return None 4431 self.raise_error("Expecting an aggregation function in PIVOT") 4432 4433 return self._parse_alias(func) 4434 4435 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4436 index = self._index 4437 include_nulls = None 4438 4439 if self._match(TokenType.PIVOT): 4440 unpivot = False 4441 elif self._match(TokenType.UNPIVOT): 4442 unpivot = True 4443 4444 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4445 if self._match_text_seq("INCLUDE", "NULLS"): 4446 include_nulls = True 4447 elif self._match_text_seq("EXCLUDE", "NULLS"): 4448 include_nulls = False 4449 else: 4450 return None 4451 4452 expressions = [] 4453 4454 if not self._match(TokenType.L_PAREN): 4455 self._retreat(index) 4456 return None 4457 4458 if unpivot: 4459 expressions = self._parse_csv(self._parse_column) 4460 else: 4461 expressions = self._parse_csv(self._parse_pivot_aggregation) 4462 4463 if not expressions: 4464 self.raise_error("Failed to parse PIVOT's aggregation list") 4465 4466 if not self._match(TokenType.FOR): 4467 self.raise_error("Expecting FOR") 4468 4469 fields = [] 4470 while True: 4471 field = self._try_parse(self._parse_pivot_in) 4472 if not field: 4473 break 4474 fields.append(field) 4475 4476 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4477 self._parse_bitwise 4478 ) 4479 4480 group = self._parse_group() 4481 4482 self._match_r_paren() 4483 4484 pivot = self.expression( 4485 exp.Pivot, 4486 expressions=expressions, 4487 fields=fields, 4488 unpivot=unpivot, 4489 include_nulls=include_nulls, 4490 default_on_null=default_on_null, 4491 group=group, 4492 ) 4493 4494 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4495 pivot.set("alias", self._parse_table_alias()) 4496 4497 if not unpivot: 4498 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4499 4500 columns: t.List[exp.Expression] = [] 4501 all_fields = [] 4502 for pivot_field in pivot.fields: 4503 pivot_field_expressions = pivot_field.expressions 4504 4505 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4506 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4507 continue 4508 4509 all_fields.append( 4510 [ 4511 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4512 for fld in pivot_field_expressions 4513 ] 4514 ) 4515 4516 if all_fields: 4517 if names: 4518 all_fields.append(names) 4519 4520 # Generate all possible combinations of the pivot columns 4521 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4522 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4523 for fld_parts_tuple in itertools.product(*all_fields): 4524 fld_parts = list(fld_parts_tuple) 4525 4526 if names and self.PREFIXED_PIVOT_COLUMNS: 4527 # Move the "name" to the front of the list 4528 fld_parts.insert(0, fld_parts.pop(-1)) 4529 4530 columns.append(exp.to_identifier("_".join(fld_parts))) 4531 4532 pivot.set("columns", columns) 4533 4534 return pivot 4535 4536 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4537 return [agg.alias for agg in aggregations if agg.alias] 4538 4539 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4540 if not skip_where_token and not self._match(TokenType.PREWHERE): 4541 return None 4542 4543 return self.expression( 4544 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4545 ) 4546 4547 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4548 if not skip_where_token and not self._match(TokenType.WHERE): 4549 return None 4550 4551 return self.expression( 4552 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4553 ) 4554 4555 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4556 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4557 return None 4558 comments = self._prev_comments 4559 4560 elements: t.Dict[str, t.Any] = defaultdict(list) 4561 4562 if self._match(TokenType.ALL): 4563 elements["all"] = True 4564 elif self._match(TokenType.DISTINCT): 4565 elements["all"] = False 4566 4567 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4568 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4569 4570 while True: 4571 index = self._index 4572 4573 elements["expressions"].extend( 4574 self._parse_csv( 4575 lambda: None 4576 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4577 else self._parse_assignment() 4578 ) 4579 ) 4580 4581 before_with_index = self._index 4582 with_prefix = self._match(TokenType.WITH) 4583 4584 if self._match(TokenType.ROLLUP): 4585 elements["rollup"].append( 4586 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4587 ) 4588 elif self._match(TokenType.CUBE): 4589 elements["cube"].append( 4590 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4591 ) 4592 elif self._match(TokenType.GROUPING_SETS): 4593 elements["grouping_sets"].append( 4594 self.expression( 4595 exp.GroupingSets, 4596 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4597 ) 4598 ) 4599 elif self._match_text_seq("TOTALS"): 4600 elements["totals"] = True # type: ignore 4601 4602 if before_with_index <= self._index <= before_with_index + 1: 4603 self._retreat(before_with_index) 4604 break 4605 4606 if index == self._index: 4607 break 4608 4609 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4610 4611 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4612 return self.expression( 4613 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4614 ) 4615 4616 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4617 if self._match(TokenType.L_PAREN): 4618 grouping_set = self._parse_csv(self._parse_column) 4619 self._match_r_paren() 4620 return self.expression(exp.Tuple, expressions=grouping_set) 4621 4622 return self._parse_column() 4623 4624 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4625 if not skip_having_token and not self._match(TokenType.HAVING): 4626 return None 4627 return self.expression( 4628 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4629 ) 4630 4631 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4632 if not self._match(TokenType.QUALIFY): 4633 return None 4634 return self.expression(exp.Qualify, this=self._parse_assignment()) 4635 4636 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4637 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4638 exp.Prior, this=self._parse_bitwise() 4639 ) 4640 connect = self._parse_assignment() 4641 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4642 return connect 4643 4644 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4645 if skip_start_token: 4646 start = None 4647 elif self._match(TokenType.START_WITH): 4648 start = self._parse_assignment() 4649 else: 4650 return None 4651 4652 self._match(TokenType.CONNECT_BY) 4653 nocycle = self._match_text_seq("NOCYCLE") 4654 connect = self._parse_connect_with_prior() 4655 4656 if not start and self._match(TokenType.START_WITH): 4657 start = self._parse_assignment() 4658 4659 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4660 4661 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4662 this = self._parse_id_var(any_token=True) 4663 if self._match(TokenType.ALIAS): 4664 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4665 return this 4666 4667 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4668 if self._match_text_seq("INTERPOLATE"): 4669 return self._parse_wrapped_csv(self._parse_name_as_expression) 4670 return None 4671 4672 def _parse_order( 4673 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4674 ) -> t.Optional[exp.Expression]: 4675 siblings = None 4676 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4677 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4678 return this 4679 4680 siblings = True 4681 4682 return self.expression( 4683 exp.Order, 4684 comments=self._prev_comments, 4685 this=this, 4686 expressions=self._parse_csv(self._parse_ordered), 4687 siblings=siblings, 4688 ) 4689 4690 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4691 if not self._match(token): 4692 return None 4693 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4694 4695 def _parse_ordered( 4696 self, parse_method: t.Optional[t.Callable] = None 4697 ) -> t.Optional[exp.Ordered]: 4698 this = parse_method() if parse_method else self._parse_assignment() 4699 if not this: 4700 return None 4701 4702 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4703 this = exp.var("ALL") 4704 4705 asc = self._match(TokenType.ASC) 4706 desc = self._match(TokenType.DESC) or (asc and False) 4707 4708 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4709 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4710 4711 nulls_first = is_nulls_first or False 4712 explicitly_null_ordered = is_nulls_first or is_nulls_last 4713 4714 if ( 4715 not explicitly_null_ordered 4716 and ( 4717 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4718 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4719 ) 4720 and self.dialect.NULL_ORDERING != "nulls_are_last" 4721 ): 4722 nulls_first = True 4723 4724 if self._match_text_seq("WITH", "FILL"): 4725 with_fill = self.expression( 4726 exp.WithFill, 4727 **{ # type: ignore 4728 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4729 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4730 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4731 "interpolate": self._parse_interpolate(), 4732 }, 4733 ) 4734 else: 4735 with_fill = None 4736 4737 return self.expression( 4738 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4739 ) 4740 4741 def _parse_limit_options(self) -> exp.LimitOptions: 4742 percent = self._match(TokenType.PERCENT) 4743 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4744 self._match_text_seq("ONLY") 4745 with_ties = self._match_text_seq("WITH", "TIES") 4746 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4747 4748 def _parse_limit( 4749 self, 4750 this: t.Optional[exp.Expression] = None, 4751 top: bool = False, 4752 skip_limit_token: bool = False, 4753 ) -> t.Optional[exp.Expression]: 4754 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4755 comments = self._prev_comments 4756 if top: 4757 limit_paren = self._match(TokenType.L_PAREN) 4758 expression = self._parse_term() if limit_paren else self._parse_number() 4759 4760 if limit_paren: 4761 self._match_r_paren() 4762 4763 limit_options = self._parse_limit_options() 4764 else: 4765 limit_options = None 4766 expression = self._parse_term() 4767 4768 if self._match(TokenType.COMMA): 4769 offset = expression 4770 expression = self._parse_term() 4771 else: 4772 offset = None 4773 4774 limit_exp = self.expression( 4775 exp.Limit, 4776 this=this, 4777 expression=expression, 4778 offset=offset, 4779 comments=comments, 4780 limit_options=limit_options, 4781 expressions=self._parse_limit_by(), 4782 ) 4783 4784 return limit_exp 4785 4786 if self._match(TokenType.FETCH): 4787 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4788 direction = self._prev.text.upper() if direction else "FIRST" 4789 4790 count = self._parse_field(tokens=self.FETCH_TOKENS) 4791 4792 return self.expression( 4793 exp.Fetch, 4794 direction=direction, 4795 count=count, 4796 limit_options=self._parse_limit_options(), 4797 ) 4798 4799 return this 4800 4801 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4802 if not self._match(TokenType.OFFSET): 4803 return this 4804 4805 count = self._parse_term() 4806 self._match_set((TokenType.ROW, TokenType.ROWS)) 4807 4808 return self.expression( 4809 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4810 ) 4811 4812 def _can_parse_limit_or_offset(self) -> bool: 4813 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4814 return False 4815 4816 index = self._index 4817 result = bool( 4818 self._try_parse(self._parse_limit, retreat=True) 4819 or self._try_parse(self._parse_offset, retreat=True) 4820 ) 4821 self._retreat(index) 4822 return result 4823 4824 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4825 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4826 4827 def _parse_locks(self) -> t.List[exp.Lock]: 4828 locks = [] 4829 while True: 4830 update, key = None, None 4831 if self._match_text_seq("FOR", "UPDATE"): 4832 update = True 4833 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4834 "LOCK", "IN", "SHARE", "MODE" 4835 ): 4836 update = False 4837 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4838 update, key = False, True 4839 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4840 update, key = True, True 4841 else: 4842 break 4843 4844 expressions = None 4845 if self._match_text_seq("OF"): 4846 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4847 4848 wait: t.Optional[bool | exp.Expression] = None 4849 if self._match_text_seq("NOWAIT"): 4850 wait = True 4851 elif self._match_text_seq("WAIT"): 4852 wait = self._parse_primary() 4853 elif self._match_text_seq("SKIP", "LOCKED"): 4854 wait = False 4855 4856 locks.append( 4857 self.expression( 4858 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4859 ) 4860 ) 4861 4862 return locks 4863 4864 def parse_set_operation( 4865 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4866 ) -> t.Optional[exp.Expression]: 4867 start = self._index 4868 _, side_token, kind_token = self._parse_join_parts() 4869 4870 side = side_token.text if side_token else None 4871 kind = kind_token.text if kind_token else None 4872 4873 if not self._match_set(self.SET_OPERATIONS): 4874 self._retreat(start) 4875 return None 4876 4877 token_type = self._prev.token_type 4878 4879 if token_type == TokenType.UNION: 4880 operation: t.Type[exp.SetOperation] = exp.Union 4881 elif token_type == TokenType.EXCEPT: 4882 operation = exp.Except 4883 else: 4884 operation = exp.Intersect 4885 4886 comments = self._prev.comments 4887 4888 if self._match(TokenType.DISTINCT): 4889 distinct: t.Optional[bool] = True 4890 elif self._match(TokenType.ALL): 4891 distinct = False 4892 else: 4893 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4894 if distinct is None: 4895 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4896 4897 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4898 "STRICT", "CORRESPONDING" 4899 ) 4900 if self._match_text_seq("CORRESPONDING"): 4901 by_name = True 4902 if not side and not kind: 4903 kind = "INNER" 4904 4905 on_column_list = None 4906 if by_name and self._match_texts(("ON", "BY")): 4907 on_column_list = self._parse_wrapped_csv(self._parse_column) 4908 4909 expression = self._parse_select( 4910 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4911 ) 4912 4913 return self.expression( 4914 operation, 4915 comments=comments, 4916 this=this, 4917 distinct=distinct, 4918 by_name=by_name, 4919 expression=expression, 4920 side=side, 4921 kind=kind, 4922 on=on_column_list, 4923 ) 4924 4925 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4926 while this: 4927 setop = self.parse_set_operation(this) 4928 if not setop: 4929 break 4930 this = setop 4931 4932 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4933 expression = this.expression 4934 4935 if expression: 4936 for arg in self.SET_OP_MODIFIERS: 4937 expr = expression.args.get(arg) 4938 if expr: 4939 this.set(arg, expr.pop()) 4940 4941 return this 4942 4943 def _parse_expression(self) -> t.Optional[exp.Expression]: 4944 return self._parse_alias(self._parse_assignment()) 4945 4946 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4947 this = self._parse_disjunction() 4948 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4949 # This allows us to parse <non-identifier token> := <expr> 4950 this = exp.column( 4951 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4952 ) 4953 4954 while self._match_set(self.ASSIGNMENT): 4955 if isinstance(this, exp.Column) and len(this.parts) == 1: 4956 this = this.this 4957 4958 this = self.expression( 4959 self.ASSIGNMENT[self._prev.token_type], 4960 this=this, 4961 comments=self._prev_comments, 4962 expression=self._parse_assignment(), 4963 ) 4964 4965 return this 4966 4967 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4968 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4969 4970 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4971 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4972 4973 def _parse_equality(self) -> t.Optional[exp.Expression]: 4974 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4975 4976 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4977 return self._parse_tokens(self._parse_range, self.COMPARISON) 4978 4979 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4980 this = this or self._parse_bitwise() 4981 negate = self._match(TokenType.NOT) 4982 4983 if self._match_set(self.RANGE_PARSERS): 4984 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4985 if not expression: 4986 return this 4987 4988 this = expression 4989 elif self._match(TokenType.ISNULL): 4990 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4991 4992 # Postgres supports ISNULL and NOTNULL for conditions. 4993 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4994 if self._match(TokenType.NOTNULL): 4995 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4996 this = self.expression(exp.Not, this=this) 4997 4998 if negate: 4999 this = self._negate_range(this) 5000 5001 if self._match(TokenType.IS): 5002 this = self._parse_is(this) 5003 5004 return this 5005 5006 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5007 if not this: 5008 return this 5009 5010 return self.expression(exp.Not, this=this) 5011 5012 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5013 index = self._index - 1 5014 negate = self._match(TokenType.NOT) 5015 5016 if self._match_text_seq("DISTINCT", "FROM"): 5017 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5018 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5019 5020 if self._match(TokenType.JSON): 5021 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5022 5023 if self._match_text_seq("WITH"): 5024 _with = True 5025 elif self._match_text_seq("WITHOUT"): 5026 _with = False 5027 else: 5028 _with = None 5029 5030 unique = self._match(TokenType.UNIQUE) 5031 self._match_text_seq("KEYS") 5032 expression: t.Optional[exp.Expression] = self.expression( 5033 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5034 ) 5035 else: 5036 expression = self._parse_primary() or self._parse_null() 5037 if not expression: 5038 self._retreat(index) 5039 return None 5040 5041 this = self.expression(exp.Is, this=this, expression=expression) 5042 return self.expression(exp.Not, this=this) if negate else this 5043 5044 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5045 unnest = self._parse_unnest(with_alias=False) 5046 if unnest: 5047 this = self.expression(exp.In, this=this, unnest=unnest) 5048 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5049 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5050 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5051 5052 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5053 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5054 else: 5055 this = self.expression(exp.In, this=this, expressions=expressions) 5056 5057 if matched_l_paren: 5058 self._match_r_paren(this) 5059 elif not self._match(TokenType.R_BRACKET, expression=this): 5060 self.raise_error("Expecting ]") 5061 else: 5062 this = self.expression(exp.In, this=this, field=self._parse_column()) 5063 5064 return this 5065 5066 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5067 symmetric = None 5068 if self._match_text_seq("SYMMETRIC"): 5069 symmetric = True 5070 elif self._match_text_seq("ASYMMETRIC"): 5071 symmetric = False 5072 5073 low = self._parse_bitwise() 5074 self._match(TokenType.AND) 5075 high = self._parse_bitwise() 5076 5077 return self.expression( 5078 exp.Between, 5079 this=this, 5080 low=low, 5081 high=high, 5082 symmetric=symmetric, 5083 ) 5084 5085 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5086 if not self._match(TokenType.ESCAPE): 5087 return this 5088 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5089 5090 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5091 index = self._index 5092 5093 if not self._match(TokenType.INTERVAL) and match_interval: 5094 return None 5095 5096 if self._match(TokenType.STRING, advance=False): 5097 this = self._parse_primary() 5098 else: 5099 this = self._parse_term() 5100 5101 if not this or ( 5102 isinstance(this, exp.Column) 5103 and not this.table 5104 and not this.this.quoted 5105 and this.name.upper() == "IS" 5106 ): 5107 self._retreat(index) 5108 return None 5109 5110 unit = self._parse_function() or ( 5111 not self._match(TokenType.ALIAS, advance=False) 5112 and self._parse_var(any_token=True, upper=True) 5113 ) 5114 5115 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5116 # each INTERVAL expression into this canonical form so it's easy to transpile 5117 if this and this.is_number: 5118 this = exp.Literal.string(this.to_py()) 5119 elif this and this.is_string: 5120 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5121 if parts and unit: 5122 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5123 unit = None 5124 self._retreat(self._index - 1) 5125 5126 if len(parts) == 1: 5127 this = exp.Literal.string(parts[0][0]) 5128 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5129 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5130 unit = self.expression( 5131 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5132 ) 5133 5134 interval = self.expression(exp.Interval, this=this, unit=unit) 5135 5136 index = self._index 5137 self._match(TokenType.PLUS) 5138 5139 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5140 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5141 return self.expression( 5142 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5143 ) 5144 5145 self._retreat(index) 5146 return interval 5147 5148 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5149 this = self._parse_term() 5150 5151 while True: 5152 if self._match_set(self.BITWISE): 5153 this = self.expression( 5154 self.BITWISE[self._prev.token_type], 5155 this=this, 5156 expression=self._parse_term(), 5157 ) 5158 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5159 this = self.expression( 5160 exp.DPipe, 5161 this=this, 5162 expression=self._parse_term(), 5163 safe=not self.dialect.STRICT_STRING_CONCAT, 5164 ) 5165 elif self._match(TokenType.DQMARK): 5166 this = self.expression( 5167 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5168 ) 5169 elif self._match_pair(TokenType.LT, TokenType.LT): 5170 this = self.expression( 5171 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5172 ) 5173 elif self._match_pair(TokenType.GT, TokenType.GT): 5174 this = self.expression( 5175 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5176 ) 5177 else: 5178 break 5179 5180 return this 5181 5182 def _parse_term(self) -> t.Optional[exp.Expression]: 5183 this = self._parse_factor() 5184 5185 while self._match_set(self.TERM): 5186 klass = self.TERM[self._prev.token_type] 5187 comments = self._prev_comments 5188 expression = self._parse_factor() 5189 5190 this = self.expression(klass, this=this, comments=comments, expression=expression) 5191 5192 if isinstance(this, exp.Collate): 5193 expr = this.expression 5194 5195 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5196 # fallback to Identifier / Var 5197 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5198 ident = expr.this 5199 if isinstance(ident, exp.Identifier): 5200 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5201 5202 return this 5203 5204 def _parse_factor(self) -> t.Optional[exp.Expression]: 5205 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5206 this = parse_method() 5207 5208 while self._match_set(self.FACTOR): 5209 klass = self.FACTOR[self._prev.token_type] 5210 comments = self._prev_comments 5211 expression = parse_method() 5212 5213 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5214 self._retreat(self._index - 1) 5215 return this 5216 5217 this = self.expression(klass, this=this, comments=comments, expression=expression) 5218 5219 if isinstance(this, exp.Div): 5220 this.args["typed"] = self.dialect.TYPED_DIVISION 5221 this.args["safe"] = self.dialect.SAFE_DIVISION 5222 5223 return this 5224 5225 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5226 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5227 5228 def _parse_unary(self) -> t.Optional[exp.Expression]: 5229 if self._match_set(self.UNARY_PARSERS): 5230 return self.UNARY_PARSERS[self._prev.token_type](self) 5231 return self._parse_at_time_zone(self._parse_type()) 5232 5233 def _parse_type( 5234 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5235 ) -> t.Optional[exp.Expression]: 5236 interval = parse_interval and self._parse_interval() 5237 if interval: 5238 return interval 5239 5240 index = self._index 5241 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5242 5243 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5244 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5245 if isinstance(data_type, exp.Cast): 5246 # This constructor can contain ops directly after it, for instance struct unnesting: 5247 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5248 return self._parse_column_ops(data_type) 5249 5250 if data_type: 5251 index2 = self._index 5252 this = self._parse_primary() 5253 5254 if isinstance(this, exp.Literal): 5255 literal = this.name 5256 this = self._parse_column_ops(this) 5257 5258 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5259 if parser: 5260 return parser(self, this, data_type) 5261 5262 if ( 5263 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5264 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5265 and TIME_ZONE_RE.search(literal) 5266 ): 5267 data_type = exp.DataType.build("TIMESTAMPTZ") 5268 5269 return self.expression(exp.Cast, this=this, to=data_type) 5270 5271 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5272 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5273 # 5274 # If the index difference here is greater than 1, that means the parser itself must have 5275 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5276 # 5277 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5278 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5279 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5280 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5281 # 5282 # In these cases, we don't really want to return the converted type, but instead retreat 5283 # and try to parse a Column or Identifier in the section below. 5284 if data_type.expressions and index2 - index > 1: 5285 self._retreat(index2) 5286 return self._parse_column_ops(data_type) 5287 5288 self._retreat(index) 5289 5290 if fallback_to_identifier: 5291 return self._parse_id_var() 5292 5293 this = self._parse_column() 5294 return this and self._parse_column_ops(this) 5295 5296 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5297 this = self._parse_type() 5298 if not this: 5299 return None 5300 5301 if isinstance(this, exp.Column) and not this.table: 5302 this = exp.var(this.name.upper()) 5303 5304 return self.expression( 5305 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5306 ) 5307 5308 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5309 type_name = identifier.name 5310 5311 while self._match(TokenType.DOT): 5312 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5313 5314 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5315 5316 def _parse_types( 5317 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5318 ) -> t.Optional[exp.Expression]: 5319 index = self._index 5320 5321 this: t.Optional[exp.Expression] = None 5322 prefix = self._match_text_seq("SYSUDTLIB", ".") 5323 5324 if self._match_set(self.TYPE_TOKENS): 5325 type_token = self._prev.token_type 5326 else: 5327 type_token = None 5328 identifier = allow_identifiers and self._parse_id_var( 5329 any_token=False, tokens=(TokenType.VAR,) 5330 ) 5331 if isinstance(identifier, exp.Identifier): 5332 try: 5333 tokens = self.dialect.tokenize(identifier.name) 5334 except TokenError: 5335 tokens = None 5336 5337 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5338 type_token = tokens[0].token_type 5339 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5340 this = self._parse_user_defined_type(identifier) 5341 else: 5342 self._retreat(self._index - 1) 5343 return None 5344 else: 5345 return None 5346 5347 if type_token == TokenType.PSEUDO_TYPE: 5348 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5349 5350 if type_token == TokenType.OBJECT_IDENTIFIER: 5351 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5352 5353 # https://materialize.com/docs/sql/types/map/ 5354 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5355 key_type = self._parse_types( 5356 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5357 ) 5358 if not self._match(TokenType.FARROW): 5359 self._retreat(index) 5360 return None 5361 5362 value_type = self._parse_types( 5363 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5364 ) 5365 if not self._match(TokenType.R_BRACKET): 5366 self._retreat(index) 5367 return None 5368 5369 return exp.DataType( 5370 this=exp.DataType.Type.MAP, 5371 expressions=[key_type, value_type], 5372 nested=True, 5373 prefix=prefix, 5374 ) 5375 5376 nested = type_token in self.NESTED_TYPE_TOKENS 5377 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5378 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5379 expressions = None 5380 maybe_func = False 5381 5382 if self._match(TokenType.L_PAREN): 5383 if is_struct: 5384 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5385 elif nested: 5386 expressions = self._parse_csv( 5387 lambda: self._parse_types( 5388 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5389 ) 5390 ) 5391 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5392 this = expressions[0] 5393 this.set("nullable", True) 5394 self._match_r_paren() 5395 return this 5396 elif type_token in self.ENUM_TYPE_TOKENS: 5397 expressions = self._parse_csv(self._parse_equality) 5398 elif is_aggregate: 5399 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5400 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5401 ) 5402 if not func_or_ident: 5403 return None 5404 expressions = [func_or_ident] 5405 if self._match(TokenType.COMMA): 5406 expressions.extend( 5407 self._parse_csv( 5408 lambda: self._parse_types( 5409 check_func=check_func, 5410 schema=schema, 5411 allow_identifiers=allow_identifiers, 5412 ) 5413 ) 5414 ) 5415 else: 5416 expressions = self._parse_csv(self._parse_type_size) 5417 5418 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5419 if type_token == TokenType.VECTOR and len(expressions) == 2: 5420 expressions = self._parse_vector_expressions(expressions) 5421 5422 if not self._match(TokenType.R_PAREN): 5423 self._retreat(index) 5424 return None 5425 5426 maybe_func = True 5427 5428 values: t.Optional[t.List[exp.Expression]] = None 5429 5430 if nested and self._match(TokenType.LT): 5431 if is_struct: 5432 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5433 else: 5434 expressions = self._parse_csv( 5435 lambda: self._parse_types( 5436 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5437 ) 5438 ) 5439 5440 if not self._match(TokenType.GT): 5441 self.raise_error("Expecting >") 5442 5443 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5444 values = self._parse_csv(self._parse_assignment) 5445 if not values and is_struct: 5446 values = None 5447 self._retreat(self._index - 1) 5448 else: 5449 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5450 5451 if type_token in self.TIMESTAMPS: 5452 if self._match_text_seq("WITH", "TIME", "ZONE"): 5453 maybe_func = False 5454 tz_type = ( 5455 exp.DataType.Type.TIMETZ 5456 if type_token in self.TIMES 5457 else exp.DataType.Type.TIMESTAMPTZ 5458 ) 5459 this = exp.DataType(this=tz_type, expressions=expressions) 5460 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5461 maybe_func = False 5462 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5463 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5464 maybe_func = False 5465 elif type_token == TokenType.INTERVAL: 5466 unit = self._parse_var(upper=True) 5467 if unit: 5468 if self._match_text_seq("TO"): 5469 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5470 5471 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5472 else: 5473 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5474 elif type_token == TokenType.VOID: 5475 this = exp.DataType(this=exp.DataType.Type.NULL) 5476 5477 if maybe_func and check_func: 5478 index2 = self._index 5479 peek = self._parse_string() 5480 5481 if not peek: 5482 self._retreat(index) 5483 return None 5484 5485 self._retreat(index2) 5486 5487 if not this: 5488 if self._match_text_seq("UNSIGNED"): 5489 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5490 if not unsigned_type_token: 5491 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5492 5493 type_token = unsigned_type_token or type_token 5494 5495 this = exp.DataType( 5496 this=exp.DataType.Type[type_token.value], 5497 expressions=expressions, 5498 nested=nested, 5499 prefix=prefix, 5500 ) 5501 5502 # Empty arrays/structs are allowed 5503 if values is not None: 5504 cls = exp.Struct if is_struct else exp.Array 5505 this = exp.cast(cls(expressions=values), this, copy=False) 5506 5507 elif expressions: 5508 this.set("expressions", expressions) 5509 5510 # https://materialize.com/docs/sql/types/list/#type-name 5511 while self._match(TokenType.LIST): 5512 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5513 5514 index = self._index 5515 5516 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5517 matched_array = self._match(TokenType.ARRAY) 5518 5519 while self._curr: 5520 datatype_token = self._prev.token_type 5521 matched_l_bracket = self._match(TokenType.L_BRACKET) 5522 5523 if (not matched_l_bracket and not matched_array) or ( 5524 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5525 ): 5526 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5527 # not to be confused with the fixed size array parsing 5528 break 5529 5530 matched_array = False 5531 values = self._parse_csv(self._parse_assignment) or None 5532 if ( 5533 values 5534 and not schema 5535 and ( 5536 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5537 ) 5538 ): 5539 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5540 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5541 self._retreat(index) 5542 break 5543 5544 this = exp.DataType( 5545 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5546 ) 5547 self._match(TokenType.R_BRACKET) 5548 5549 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5550 converter = self.TYPE_CONVERTERS.get(this.this) 5551 if converter: 5552 this = converter(t.cast(exp.DataType, this)) 5553 5554 return this 5555 5556 def _parse_vector_expressions( 5557 self, expressions: t.List[exp.Expression] 5558 ) -> t.List[exp.Expression]: 5559 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5560 5561 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5562 index = self._index 5563 5564 if ( 5565 self._curr 5566 and self._next 5567 and self._curr.token_type in self.TYPE_TOKENS 5568 and self._next.token_type in self.TYPE_TOKENS 5569 ): 5570 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5571 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5572 this = self._parse_id_var() 5573 else: 5574 this = ( 5575 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5576 or self._parse_id_var() 5577 ) 5578 5579 self._match(TokenType.COLON) 5580 5581 if ( 5582 type_required 5583 and not isinstance(this, exp.DataType) 5584 and not self._match_set(self.TYPE_TOKENS, advance=False) 5585 ): 5586 self._retreat(index) 5587 return self._parse_types() 5588 5589 return self._parse_column_def(this) 5590 5591 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5592 if not self._match_text_seq("AT", "TIME", "ZONE"): 5593 return this 5594 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5595 5596 def _parse_column(self) -> t.Optional[exp.Expression]: 5597 this = self._parse_column_reference() 5598 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5599 5600 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5601 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5602 5603 return column 5604 5605 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5606 this = self._parse_field() 5607 if ( 5608 not this 5609 and self._match(TokenType.VALUES, advance=False) 5610 and self.VALUES_FOLLOWED_BY_PAREN 5611 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5612 ): 5613 this = self._parse_id_var() 5614 5615 if isinstance(this, exp.Identifier): 5616 # We bubble up comments from the Identifier to the Column 5617 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5618 5619 return this 5620 5621 def _parse_colon_as_variant_extract( 5622 self, this: t.Optional[exp.Expression] 5623 ) -> t.Optional[exp.Expression]: 5624 casts = [] 5625 json_path = [] 5626 escape = None 5627 5628 while self._match(TokenType.COLON): 5629 start_index = self._index 5630 5631 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5632 path = self._parse_column_ops( 5633 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5634 ) 5635 5636 # The cast :: operator has a lower precedence than the extraction operator :, so 5637 # we rearrange the AST appropriately to avoid casting the JSON path 5638 while isinstance(path, exp.Cast): 5639 casts.append(path.to) 5640 path = path.this 5641 5642 if casts: 5643 dcolon_offset = next( 5644 i 5645 for i, t in enumerate(self._tokens[start_index:]) 5646 if t.token_type == TokenType.DCOLON 5647 ) 5648 end_token = self._tokens[start_index + dcolon_offset - 1] 5649 else: 5650 end_token = self._prev 5651 5652 if path: 5653 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5654 # it'll roundtrip to a string literal in GET_PATH 5655 if isinstance(path, exp.Identifier) and path.quoted: 5656 escape = True 5657 5658 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5659 5660 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5661 # Databricks transforms it back to the colon/dot notation 5662 if json_path: 5663 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5664 5665 if json_path_expr: 5666 json_path_expr.set("escape", escape) 5667 5668 this = self.expression( 5669 exp.JSONExtract, 5670 this=this, 5671 expression=json_path_expr, 5672 variant_extract=True, 5673 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5674 ) 5675 5676 while casts: 5677 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5678 5679 return this 5680 5681 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5682 return self._parse_types() 5683 5684 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5685 this = self._parse_bracket(this) 5686 5687 while self._match_set(self.COLUMN_OPERATORS): 5688 op_token = self._prev.token_type 5689 op = self.COLUMN_OPERATORS.get(op_token) 5690 5691 if op_token in self.CAST_COLUMN_OPERATORS: 5692 field = self._parse_dcolon() 5693 if not field: 5694 self.raise_error("Expected type") 5695 elif op and self._curr: 5696 field = self._parse_column_reference() or self._parse_bracket() 5697 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5698 field = self._parse_column_ops(field) 5699 else: 5700 field = self._parse_field(any_token=True, anonymous_func=True) 5701 5702 # Function calls can be qualified, e.g., x.y.FOO() 5703 # This converts the final AST to a series of Dots leading to the function call 5704 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5705 if isinstance(field, (exp.Func, exp.Window)) and this: 5706 this = this.transform( 5707 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5708 ) 5709 5710 if op: 5711 this = op(self, this, field) 5712 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5713 this = self.expression( 5714 exp.Column, 5715 comments=this.comments, 5716 this=field, 5717 table=this.this, 5718 db=this.args.get("table"), 5719 catalog=this.args.get("db"), 5720 ) 5721 elif isinstance(field, exp.Window): 5722 # Move the exp.Dot's to the window's function 5723 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5724 field.set("this", window_func) 5725 this = field 5726 else: 5727 this = self.expression(exp.Dot, this=this, expression=field) 5728 5729 if field and field.comments: 5730 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5731 5732 this = self._parse_bracket(this) 5733 5734 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5735 5736 def _parse_paren(self) -> t.Optional[exp.Expression]: 5737 if not self._match(TokenType.L_PAREN): 5738 return None 5739 5740 comments = self._prev_comments 5741 query = self._parse_select() 5742 5743 if query: 5744 expressions = [query] 5745 else: 5746 expressions = self._parse_expressions() 5747 5748 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5749 5750 if not this and self._match(TokenType.R_PAREN, advance=False): 5751 this = self.expression(exp.Tuple) 5752 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5753 this = self._parse_subquery(this=this, parse_alias=False) 5754 elif isinstance(this, exp.Subquery): 5755 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5756 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5757 this = self.expression(exp.Tuple, expressions=expressions) 5758 else: 5759 this = self.expression(exp.Paren, this=this) 5760 5761 if this: 5762 this.add_comments(comments) 5763 5764 self._match_r_paren(expression=this) 5765 return this 5766 5767 def _parse_primary(self) -> t.Optional[exp.Expression]: 5768 if self._match_set(self.PRIMARY_PARSERS): 5769 token_type = self._prev.token_type 5770 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5771 5772 if token_type == TokenType.STRING: 5773 expressions = [primary] 5774 while self._match(TokenType.STRING): 5775 expressions.append(exp.Literal.string(self._prev.text)) 5776 5777 if len(expressions) > 1: 5778 return self.expression(exp.Concat, expressions=expressions) 5779 5780 return primary 5781 5782 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5783 return exp.Literal.number(f"0.{self._prev.text}") 5784 5785 return self._parse_paren() 5786 5787 def _parse_field( 5788 self, 5789 any_token: bool = False, 5790 tokens: t.Optional[t.Collection[TokenType]] = None, 5791 anonymous_func: bool = False, 5792 ) -> t.Optional[exp.Expression]: 5793 if anonymous_func: 5794 field = ( 5795 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5796 or self._parse_primary() 5797 ) 5798 else: 5799 field = self._parse_primary() or self._parse_function( 5800 anonymous=anonymous_func, any_token=any_token 5801 ) 5802 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5803 5804 def _parse_function( 5805 self, 5806 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5807 anonymous: bool = False, 5808 optional_parens: bool = True, 5809 any_token: bool = False, 5810 ) -> t.Optional[exp.Expression]: 5811 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5812 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5813 fn_syntax = False 5814 if ( 5815 self._match(TokenType.L_BRACE, advance=False) 5816 and self._next 5817 and self._next.text.upper() == "FN" 5818 ): 5819 self._advance(2) 5820 fn_syntax = True 5821 5822 func = self._parse_function_call( 5823 functions=functions, 5824 anonymous=anonymous, 5825 optional_parens=optional_parens, 5826 any_token=any_token, 5827 ) 5828 5829 if fn_syntax: 5830 self._match(TokenType.R_BRACE) 5831 5832 return func 5833 5834 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 5835 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5836 5837 def _parse_function_call( 5838 self, 5839 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5840 anonymous: bool = False, 5841 optional_parens: bool = True, 5842 any_token: bool = False, 5843 ) -> t.Optional[exp.Expression]: 5844 if not self._curr: 5845 return None 5846 5847 comments = self._curr.comments 5848 prev = self._prev 5849 token = self._curr 5850 token_type = self._curr.token_type 5851 this = self._curr.text 5852 upper = this.upper() 5853 5854 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5855 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5856 self._advance() 5857 return self._parse_window(parser(self)) 5858 5859 if not self._next or self._next.token_type != TokenType.L_PAREN: 5860 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5861 self._advance() 5862 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5863 5864 return None 5865 5866 if any_token: 5867 if token_type in self.RESERVED_TOKENS: 5868 return None 5869 elif token_type not in self.FUNC_TOKENS: 5870 return None 5871 5872 self._advance(2) 5873 5874 parser = self.FUNCTION_PARSERS.get(upper) 5875 if parser and not anonymous: 5876 this = parser(self) 5877 else: 5878 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5879 5880 if subquery_predicate: 5881 expr = None 5882 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5883 expr = self._parse_select() 5884 self._match_r_paren() 5885 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5886 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5887 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5888 self._advance(-1) 5889 expr = self._parse_bitwise() 5890 5891 if expr: 5892 return self.expression(subquery_predicate, comments=comments, this=expr) 5893 5894 if functions is None: 5895 functions = self.FUNCTIONS 5896 5897 function = functions.get(upper) 5898 known_function = function and not anonymous 5899 5900 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5901 args = self._parse_function_args(alias) 5902 5903 post_func_comments = self._curr and self._curr.comments 5904 if known_function and post_func_comments: 5905 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5906 # call we'll construct it as exp.Anonymous, even if it's "known" 5907 if any( 5908 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5909 for comment in post_func_comments 5910 ): 5911 known_function = False 5912 5913 if alias and known_function: 5914 args = self._kv_to_prop_eq(args) 5915 5916 if known_function: 5917 func_builder = t.cast(t.Callable, function) 5918 5919 if "dialect" in func_builder.__code__.co_varnames: 5920 func = func_builder(args, dialect=self.dialect) 5921 else: 5922 func = func_builder(args) 5923 5924 func = self.validate_expression(func, args) 5925 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5926 func.meta["name"] = this 5927 5928 this = func 5929 else: 5930 if token_type == TokenType.IDENTIFIER: 5931 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5932 5933 this = self.expression(exp.Anonymous, this=this, expressions=args) 5934 this = this.update_positions(token) 5935 5936 if isinstance(this, exp.Expression): 5937 this.add_comments(comments) 5938 5939 self._match_r_paren(this) 5940 return self._parse_window(this) 5941 5942 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5943 return expression 5944 5945 def _kv_to_prop_eq( 5946 self, expressions: t.List[exp.Expression], parse_map: bool = False 5947 ) -> t.List[exp.Expression]: 5948 transformed = [] 5949 5950 for index, e in enumerate(expressions): 5951 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5952 if isinstance(e, exp.Alias): 5953 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5954 5955 if not isinstance(e, exp.PropertyEQ): 5956 e = self.expression( 5957 exp.PropertyEQ, 5958 this=e.this if parse_map else exp.to_identifier(e.this.name), 5959 expression=e.expression, 5960 ) 5961 5962 if isinstance(e.this, exp.Column): 5963 e.this.replace(e.this.this) 5964 else: 5965 e = self._to_prop_eq(e, index) 5966 5967 transformed.append(e) 5968 5969 return transformed 5970 5971 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5972 return self._parse_statement() 5973 5974 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5975 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5976 5977 def _parse_user_defined_function( 5978 self, kind: t.Optional[TokenType] = None 5979 ) -> t.Optional[exp.Expression]: 5980 this = self._parse_table_parts(schema=True) 5981 5982 if not self._match(TokenType.L_PAREN): 5983 return this 5984 5985 expressions = self._parse_csv(self._parse_function_parameter) 5986 self._match_r_paren() 5987 return self.expression( 5988 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5989 ) 5990 5991 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5992 literal = self._parse_primary() 5993 if literal: 5994 return self.expression(exp.Introducer, this=token.text, expression=literal) 5995 5996 return self._identifier_expression(token) 5997 5998 def _parse_session_parameter(self) -> exp.SessionParameter: 5999 kind = None 6000 this = self._parse_id_var() or self._parse_primary() 6001 6002 if this and self._match(TokenType.DOT): 6003 kind = this.name 6004 this = self._parse_var() or self._parse_primary() 6005 6006 return self.expression(exp.SessionParameter, this=this, kind=kind) 6007 6008 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6009 return self._parse_id_var() 6010 6011 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6012 index = self._index 6013 6014 if self._match(TokenType.L_PAREN): 6015 expressions = t.cast( 6016 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6017 ) 6018 6019 if not self._match(TokenType.R_PAREN): 6020 self._retreat(index) 6021 else: 6022 expressions = [self._parse_lambda_arg()] 6023 6024 if self._match_set(self.LAMBDAS): 6025 return self.LAMBDAS[self._prev.token_type](self, expressions) 6026 6027 self._retreat(index) 6028 6029 this: t.Optional[exp.Expression] 6030 6031 if self._match(TokenType.DISTINCT): 6032 this = self.expression( 6033 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6034 ) 6035 else: 6036 this = self._parse_select_or_expression(alias=alias) 6037 6038 return self._parse_limit( 6039 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6040 ) 6041 6042 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6043 index = self._index 6044 if not self._match(TokenType.L_PAREN): 6045 return this 6046 6047 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6048 # expr can be of both types 6049 if self._match_set(self.SELECT_START_TOKENS): 6050 self._retreat(index) 6051 return this 6052 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6053 self._match_r_paren() 6054 return self.expression(exp.Schema, this=this, expressions=args) 6055 6056 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6057 return self._parse_column_def(self._parse_field(any_token=True)) 6058 6059 def _parse_column_def( 6060 self, this: t.Optional[exp.Expression], computed_column: bool = True 6061 ) -> t.Optional[exp.Expression]: 6062 # column defs are not really columns, they're identifiers 6063 if isinstance(this, exp.Column): 6064 this = this.this 6065 6066 if not computed_column: 6067 self._match(TokenType.ALIAS) 6068 6069 kind = self._parse_types(schema=True) 6070 6071 if self._match_text_seq("FOR", "ORDINALITY"): 6072 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6073 6074 constraints: t.List[exp.Expression] = [] 6075 6076 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6077 ("ALIAS", "MATERIALIZED") 6078 ): 6079 persisted = self._prev.text.upper() == "MATERIALIZED" 6080 constraint_kind = exp.ComputedColumnConstraint( 6081 this=self._parse_assignment(), 6082 persisted=persisted or self._match_text_seq("PERSISTED"), 6083 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6084 ) 6085 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6086 elif ( 6087 kind 6088 and self._match(TokenType.ALIAS, advance=False) 6089 and ( 6090 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6091 or (self._next and self._next.token_type == TokenType.L_PAREN) 6092 ) 6093 ): 6094 self._advance() 6095 constraints.append( 6096 self.expression( 6097 exp.ColumnConstraint, 6098 kind=exp.ComputedColumnConstraint( 6099 this=self._parse_disjunction(), 6100 persisted=self._match_texts(("STORED", "VIRTUAL")) 6101 and self._prev.text.upper() == "STORED", 6102 ), 6103 ) 6104 ) 6105 6106 while True: 6107 constraint = self._parse_column_constraint() 6108 if not constraint: 6109 break 6110 constraints.append(constraint) 6111 6112 if not kind and not constraints: 6113 return this 6114 6115 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6116 6117 def _parse_auto_increment( 6118 self, 6119 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6120 start = None 6121 increment = None 6122 order = None 6123 6124 if self._match(TokenType.L_PAREN, advance=False): 6125 args = self._parse_wrapped_csv(self._parse_bitwise) 6126 start = seq_get(args, 0) 6127 increment = seq_get(args, 1) 6128 elif self._match_text_seq("START"): 6129 start = self._parse_bitwise() 6130 self._match_text_seq("INCREMENT") 6131 increment = self._parse_bitwise() 6132 if self._match_text_seq("ORDER"): 6133 order = True 6134 elif self._match_text_seq("NOORDER"): 6135 order = False 6136 6137 if start and increment: 6138 return exp.GeneratedAsIdentityColumnConstraint( 6139 start=start, increment=increment, this=False, order=order 6140 ) 6141 6142 return exp.AutoIncrementColumnConstraint() 6143 6144 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6145 if not self._match_text_seq("REFRESH"): 6146 self._retreat(self._index - 1) 6147 return None 6148 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6149 6150 def _parse_compress(self) -> exp.CompressColumnConstraint: 6151 if self._match(TokenType.L_PAREN, advance=False): 6152 return self.expression( 6153 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6154 ) 6155 6156 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6157 6158 def _parse_generated_as_identity( 6159 self, 6160 ) -> ( 6161 exp.GeneratedAsIdentityColumnConstraint 6162 | exp.ComputedColumnConstraint 6163 | exp.GeneratedAsRowColumnConstraint 6164 ): 6165 if self._match_text_seq("BY", "DEFAULT"): 6166 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6167 this = self.expression( 6168 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6169 ) 6170 else: 6171 self._match_text_seq("ALWAYS") 6172 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6173 6174 self._match(TokenType.ALIAS) 6175 6176 if self._match_text_seq("ROW"): 6177 start = self._match_text_seq("START") 6178 if not start: 6179 self._match(TokenType.END) 6180 hidden = self._match_text_seq("HIDDEN") 6181 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6182 6183 identity = self._match_text_seq("IDENTITY") 6184 6185 if self._match(TokenType.L_PAREN): 6186 if self._match(TokenType.START_WITH): 6187 this.set("start", self._parse_bitwise()) 6188 if self._match_text_seq("INCREMENT", "BY"): 6189 this.set("increment", self._parse_bitwise()) 6190 if self._match_text_seq("MINVALUE"): 6191 this.set("minvalue", self._parse_bitwise()) 6192 if self._match_text_seq("MAXVALUE"): 6193 this.set("maxvalue", self._parse_bitwise()) 6194 6195 if self._match_text_seq("CYCLE"): 6196 this.set("cycle", True) 6197 elif self._match_text_seq("NO", "CYCLE"): 6198 this.set("cycle", False) 6199 6200 if not identity: 6201 this.set("expression", self._parse_range()) 6202 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6203 args = self._parse_csv(self._parse_bitwise) 6204 this.set("start", seq_get(args, 0)) 6205 this.set("increment", seq_get(args, 1)) 6206 6207 self._match_r_paren() 6208 6209 return this 6210 6211 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6212 self._match_text_seq("LENGTH") 6213 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6214 6215 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6216 if self._match_text_seq("NULL"): 6217 return self.expression(exp.NotNullColumnConstraint) 6218 if self._match_text_seq("CASESPECIFIC"): 6219 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6220 if self._match_text_seq("FOR", "REPLICATION"): 6221 return self.expression(exp.NotForReplicationColumnConstraint) 6222 6223 # Unconsume the `NOT` token 6224 self._retreat(self._index - 1) 6225 return None 6226 6227 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6228 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6229 6230 procedure_option_follows = ( 6231 self._match(TokenType.WITH, advance=False) 6232 and self._next 6233 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6234 ) 6235 6236 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6237 return self.expression( 6238 exp.ColumnConstraint, 6239 this=this, 6240 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6241 ) 6242 6243 return this 6244 6245 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6246 if not self._match(TokenType.CONSTRAINT): 6247 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6248 6249 return self.expression( 6250 exp.Constraint, 6251 this=self._parse_id_var(), 6252 expressions=self._parse_unnamed_constraints(), 6253 ) 6254 6255 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6256 constraints = [] 6257 while True: 6258 constraint = self._parse_unnamed_constraint() or self._parse_function() 6259 if not constraint: 6260 break 6261 constraints.append(constraint) 6262 6263 return constraints 6264 6265 def _parse_unnamed_constraint( 6266 self, constraints: t.Optional[t.Collection[str]] = None 6267 ) -> t.Optional[exp.Expression]: 6268 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6269 constraints or self.CONSTRAINT_PARSERS 6270 ): 6271 return None 6272 6273 constraint = self._prev.text.upper() 6274 if constraint not in self.CONSTRAINT_PARSERS: 6275 self.raise_error(f"No parser found for schema constraint {constraint}.") 6276 6277 return self.CONSTRAINT_PARSERS[constraint](self) 6278 6279 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6280 return self._parse_id_var(any_token=False) 6281 6282 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6283 self._match_texts(("KEY", "INDEX")) 6284 return self.expression( 6285 exp.UniqueColumnConstraint, 6286 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6287 this=self._parse_schema(self._parse_unique_key()), 6288 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6289 on_conflict=self._parse_on_conflict(), 6290 options=self._parse_key_constraint_options(), 6291 ) 6292 6293 def _parse_key_constraint_options(self) -> t.List[str]: 6294 options = [] 6295 while True: 6296 if not self._curr: 6297 break 6298 6299 if self._match(TokenType.ON): 6300 action = None 6301 on = self._advance_any() and self._prev.text 6302 6303 if self._match_text_seq("NO", "ACTION"): 6304 action = "NO ACTION" 6305 elif self._match_text_seq("CASCADE"): 6306 action = "CASCADE" 6307 elif self._match_text_seq("RESTRICT"): 6308 action = "RESTRICT" 6309 elif self._match_pair(TokenType.SET, TokenType.NULL): 6310 action = "SET NULL" 6311 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6312 action = "SET DEFAULT" 6313 else: 6314 self.raise_error("Invalid key constraint") 6315 6316 options.append(f"ON {on} {action}") 6317 else: 6318 var = self._parse_var_from_options( 6319 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6320 ) 6321 if not var: 6322 break 6323 options.append(var.name) 6324 6325 return options 6326 6327 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6328 if match and not self._match(TokenType.REFERENCES): 6329 return None 6330 6331 expressions = None 6332 this = self._parse_table(schema=True) 6333 options = self._parse_key_constraint_options() 6334 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6335 6336 def _parse_foreign_key(self) -> exp.ForeignKey: 6337 expressions = ( 6338 self._parse_wrapped_id_vars() 6339 if not self._match(TokenType.REFERENCES, advance=False) 6340 else None 6341 ) 6342 reference = self._parse_references() 6343 on_options = {} 6344 6345 while self._match(TokenType.ON): 6346 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6347 self.raise_error("Expected DELETE or UPDATE") 6348 6349 kind = self._prev.text.lower() 6350 6351 if self._match_text_seq("NO", "ACTION"): 6352 action = "NO ACTION" 6353 elif self._match(TokenType.SET): 6354 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6355 action = "SET " + self._prev.text.upper() 6356 else: 6357 self._advance() 6358 action = self._prev.text.upper() 6359 6360 on_options[kind] = action 6361 6362 return self.expression( 6363 exp.ForeignKey, 6364 expressions=expressions, 6365 reference=reference, 6366 options=self._parse_key_constraint_options(), 6367 **on_options, # type: ignore 6368 ) 6369 6370 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6371 return self._parse_ordered() or self._parse_field() 6372 6373 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6374 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6375 self._retreat(self._index - 1) 6376 return None 6377 6378 id_vars = self._parse_wrapped_id_vars() 6379 return self.expression( 6380 exp.PeriodForSystemTimeConstraint, 6381 this=seq_get(id_vars, 0), 6382 expression=seq_get(id_vars, 1), 6383 ) 6384 6385 def _parse_primary_key( 6386 self, wrapped_optional: bool = False, in_props: bool = False 6387 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6388 desc = ( 6389 self._match_set((TokenType.ASC, TokenType.DESC)) 6390 and self._prev.token_type == TokenType.DESC 6391 ) 6392 6393 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6394 return self.expression( 6395 exp.PrimaryKeyColumnConstraint, 6396 desc=desc, 6397 options=self._parse_key_constraint_options(), 6398 ) 6399 6400 expressions = self._parse_wrapped_csv( 6401 self._parse_primary_key_part, optional=wrapped_optional 6402 ) 6403 6404 return self.expression( 6405 exp.PrimaryKey, 6406 expressions=expressions, 6407 include=self._parse_index_params(), 6408 options=self._parse_key_constraint_options(), 6409 ) 6410 6411 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6412 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6413 6414 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6415 """ 6416 Parses a datetime column in ODBC format. We parse the column into the corresponding 6417 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6418 same as we did for `DATE('yyyy-mm-dd')`. 6419 6420 Reference: 6421 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6422 """ 6423 self._match(TokenType.VAR) 6424 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6425 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6426 if not self._match(TokenType.R_BRACE): 6427 self.raise_error("Expected }") 6428 return expression 6429 6430 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6431 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6432 return this 6433 6434 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6435 map_token = seq_get(self._tokens, self._index - 2) 6436 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6437 else: 6438 parse_map = False 6439 6440 bracket_kind = self._prev.token_type 6441 if ( 6442 bracket_kind == TokenType.L_BRACE 6443 and self._curr 6444 and self._curr.token_type == TokenType.VAR 6445 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6446 ): 6447 return self._parse_odbc_datetime_literal() 6448 6449 expressions = self._parse_csv( 6450 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6451 ) 6452 6453 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6454 self.raise_error("Expected ]") 6455 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6456 self.raise_error("Expected }") 6457 6458 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6459 if bracket_kind == TokenType.L_BRACE: 6460 this = self.expression( 6461 exp.Struct, 6462 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6463 ) 6464 elif not this: 6465 this = build_array_constructor( 6466 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6467 ) 6468 else: 6469 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6470 if constructor_type: 6471 return build_array_constructor( 6472 constructor_type, 6473 args=expressions, 6474 bracket_kind=bracket_kind, 6475 dialect=self.dialect, 6476 ) 6477 6478 expressions = apply_index_offset( 6479 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6480 ) 6481 this = self.expression( 6482 exp.Bracket, 6483 this=this, 6484 expressions=expressions, 6485 comments=this.pop_comments(), 6486 ) 6487 6488 self._add_comments(this) 6489 return self._parse_bracket(this) 6490 6491 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6492 if self._match(TokenType.COLON): 6493 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6494 return this 6495 6496 def _parse_case(self) -> t.Optional[exp.Expression]: 6497 if self._match(TokenType.DOT, advance=False): 6498 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6499 self._retreat(self._index - 1) 6500 return None 6501 6502 ifs = [] 6503 default = None 6504 6505 comments = self._prev_comments 6506 expression = self._parse_assignment() 6507 6508 while self._match(TokenType.WHEN): 6509 this = self._parse_assignment() 6510 self._match(TokenType.THEN) 6511 then = self._parse_assignment() 6512 ifs.append(self.expression(exp.If, this=this, true=then)) 6513 6514 if self._match(TokenType.ELSE): 6515 default = self._parse_assignment() 6516 6517 if not self._match(TokenType.END): 6518 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6519 default = exp.column("interval") 6520 else: 6521 self.raise_error("Expected END after CASE", self._prev) 6522 6523 return self.expression( 6524 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6525 ) 6526 6527 def _parse_if(self) -> t.Optional[exp.Expression]: 6528 if self._match(TokenType.L_PAREN): 6529 args = self._parse_csv( 6530 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6531 ) 6532 this = self.validate_expression(exp.If.from_arg_list(args), args) 6533 self._match_r_paren() 6534 else: 6535 index = self._index - 1 6536 6537 if self.NO_PAREN_IF_COMMANDS and index == 0: 6538 return self._parse_as_command(self._prev) 6539 6540 condition = self._parse_assignment() 6541 6542 if not condition: 6543 self._retreat(index) 6544 return None 6545 6546 self._match(TokenType.THEN) 6547 true = self._parse_assignment() 6548 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6549 self._match(TokenType.END) 6550 this = self.expression(exp.If, this=condition, true=true, false=false) 6551 6552 return this 6553 6554 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6555 if not self._match_text_seq("VALUE", "FOR"): 6556 self._retreat(self._index - 1) 6557 return None 6558 6559 return self.expression( 6560 exp.NextValueFor, 6561 this=self._parse_column(), 6562 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6563 ) 6564 6565 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6566 this = self._parse_function() or self._parse_var_or_string(upper=True) 6567 6568 if self._match(TokenType.FROM): 6569 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6570 6571 if not self._match(TokenType.COMMA): 6572 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6573 6574 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6575 6576 def _parse_gap_fill(self) -> exp.GapFill: 6577 self._match(TokenType.TABLE) 6578 this = self._parse_table() 6579 6580 self._match(TokenType.COMMA) 6581 args = [this, *self._parse_csv(self._parse_lambda)] 6582 6583 gap_fill = exp.GapFill.from_arg_list(args) 6584 return self.validate_expression(gap_fill, args) 6585 6586 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6587 this = self._parse_assignment() 6588 6589 if not self._match(TokenType.ALIAS): 6590 if self._match(TokenType.COMMA): 6591 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6592 6593 self.raise_error("Expected AS after CAST") 6594 6595 fmt = None 6596 to = self._parse_types() 6597 6598 default = self._match(TokenType.DEFAULT) 6599 if default: 6600 default = self._parse_bitwise() 6601 self._match_text_seq("ON", "CONVERSION", "ERROR") 6602 6603 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6604 fmt_string = self._parse_string() 6605 fmt = self._parse_at_time_zone(fmt_string) 6606 6607 if not to: 6608 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6609 if to.this in exp.DataType.TEMPORAL_TYPES: 6610 this = self.expression( 6611 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6612 this=this, 6613 format=exp.Literal.string( 6614 format_time( 6615 fmt_string.this if fmt_string else "", 6616 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6617 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6618 ) 6619 ), 6620 safe=safe, 6621 ) 6622 6623 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6624 this.set("zone", fmt.args["zone"]) 6625 return this 6626 elif not to: 6627 self.raise_error("Expected TYPE after CAST") 6628 elif isinstance(to, exp.Identifier): 6629 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6630 elif to.this == exp.DataType.Type.CHAR: 6631 if self._match(TokenType.CHARACTER_SET): 6632 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6633 6634 return self.build_cast( 6635 strict=strict, 6636 this=this, 6637 to=to, 6638 format=fmt, 6639 safe=safe, 6640 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6641 default=default, 6642 ) 6643 6644 def _parse_string_agg(self) -> exp.GroupConcat: 6645 if self._match(TokenType.DISTINCT): 6646 args: t.List[t.Optional[exp.Expression]] = [ 6647 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6648 ] 6649 if self._match(TokenType.COMMA): 6650 args.extend(self._parse_csv(self._parse_assignment)) 6651 else: 6652 args = self._parse_csv(self._parse_assignment) # type: ignore 6653 6654 if self._match_text_seq("ON", "OVERFLOW"): 6655 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6656 if self._match_text_seq("ERROR"): 6657 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6658 else: 6659 self._match_text_seq("TRUNCATE") 6660 on_overflow = self.expression( 6661 exp.OverflowTruncateBehavior, 6662 this=self._parse_string(), 6663 with_count=( 6664 self._match_text_seq("WITH", "COUNT") 6665 or not self._match_text_seq("WITHOUT", "COUNT") 6666 ), 6667 ) 6668 else: 6669 on_overflow = None 6670 6671 index = self._index 6672 if not self._match(TokenType.R_PAREN) and args: 6673 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6674 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6675 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6676 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6677 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6678 6679 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6680 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6681 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6682 if not self._match_text_seq("WITHIN", "GROUP"): 6683 self._retreat(index) 6684 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6685 6686 # The corresponding match_r_paren will be called in parse_function (caller) 6687 self._match_l_paren() 6688 6689 return self.expression( 6690 exp.GroupConcat, 6691 this=self._parse_order(this=seq_get(args, 0)), 6692 separator=seq_get(args, 1), 6693 on_overflow=on_overflow, 6694 ) 6695 6696 def _parse_convert( 6697 self, strict: bool, safe: t.Optional[bool] = None 6698 ) -> t.Optional[exp.Expression]: 6699 this = self._parse_bitwise() 6700 6701 if self._match(TokenType.USING): 6702 to: t.Optional[exp.Expression] = self.expression( 6703 exp.CharacterSet, this=self._parse_var() 6704 ) 6705 elif self._match(TokenType.COMMA): 6706 to = self._parse_types() 6707 else: 6708 to = None 6709 6710 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6711 6712 def _parse_xml_table(self) -> exp.XMLTable: 6713 namespaces = None 6714 passing = None 6715 columns = None 6716 6717 if self._match_text_seq("XMLNAMESPACES", "("): 6718 namespaces = self._parse_xml_namespace() 6719 self._match_text_seq(")", ",") 6720 6721 this = self._parse_string() 6722 6723 if self._match_text_seq("PASSING"): 6724 # The BY VALUE keywords are optional and are provided for semantic clarity 6725 self._match_text_seq("BY", "VALUE") 6726 passing = self._parse_csv(self._parse_column) 6727 6728 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6729 6730 if self._match_text_seq("COLUMNS"): 6731 columns = self._parse_csv(self._parse_field_def) 6732 6733 return self.expression( 6734 exp.XMLTable, 6735 this=this, 6736 namespaces=namespaces, 6737 passing=passing, 6738 columns=columns, 6739 by_ref=by_ref, 6740 ) 6741 6742 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6743 namespaces = [] 6744 6745 while True: 6746 if self._match(TokenType.DEFAULT): 6747 uri = self._parse_string() 6748 else: 6749 uri = self._parse_alias(self._parse_string()) 6750 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6751 if not self._match(TokenType.COMMA): 6752 break 6753 6754 return namespaces 6755 6756 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6757 args = self._parse_csv(self._parse_assignment) 6758 6759 if len(args) < 3: 6760 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6761 6762 return self.expression(exp.DecodeCase, expressions=args) 6763 6764 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6765 self._match_text_seq("KEY") 6766 key = self._parse_column() 6767 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6768 self._match_text_seq("VALUE") 6769 value = self._parse_bitwise() 6770 6771 if not key and not value: 6772 return None 6773 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6774 6775 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6776 if not this or not self._match_text_seq("FORMAT", "JSON"): 6777 return this 6778 6779 return self.expression(exp.FormatJson, this=this) 6780 6781 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6782 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6783 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6784 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6785 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6786 else: 6787 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6788 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6789 6790 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6791 6792 if not empty and not error and not null: 6793 return None 6794 6795 return self.expression( 6796 exp.OnCondition, 6797 empty=empty, 6798 error=error, 6799 null=null, 6800 ) 6801 6802 def _parse_on_handling( 6803 self, on: str, *values: str 6804 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6805 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6806 for value in values: 6807 if self._match_text_seq(value, "ON", on): 6808 return f"{value} ON {on}" 6809 6810 index = self._index 6811 if self._match(TokenType.DEFAULT): 6812 default_value = self._parse_bitwise() 6813 if self._match_text_seq("ON", on): 6814 return default_value 6815 6816 self._retreat(index) 6817 6818 return None 6819 6820 @t.overload 6821 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6822 6823 @t.overload 6824 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6825 6826 def _parse_json_object(self, agg=False): 6827 star = self._parse_star() 6828 expressions = ( 6829 [star] 6830 if star 6831 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6832 ) 6833 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6834 6835 unique_keys = None 6836 if self._match_text_seq("WITH", "UNIQUE"): 6837 unique_keys = True 6838 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6839 unique_keys = False 6840 6841 self._match_text_seq("KEYS") 6842 6843 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6844 self._parse_type() 6845 ) 6846 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6847 6848 return self.expression( 6849 exp.JSONObjectAgg if agg else exp.JSONObject, 6850 expressions=expressions, 6851 null_handling=null_handling, 6852 unique_keys=unique_keys, 6853 return_type=return_type, 6854 encoding=encoding, 6855 ) 6856 6857 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6858 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6859 if not self._match_text_seq("NESTED"): 6860 this = self._parse_id_var() 6861 kind = self._parse_types(allow_identifiers=False) 6862 nested = None 6863 else: 6864 this = None 6865 kind = None 6866 nested = True 6867 6868 path = self._match_text_seq("PATH") and self._parse_string() 6869 nested_schema = nested and self._parse_json_schema() 6870 6871 return self.expression( 6872 exp.JSONColumnDef, 6873 this=this, 6874 kind=kind, 6875 path=path, 6876 nested_schema=nested_schema, 6877 ) 6878 6879 def _parse_json_schema(self) -> exp.JSONSchema: 6880 self._match_text_seq("COLUMNS") 6881 return self.expression( 6882 exp.JSONSchema, 6883 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6884 ) 6885 6886 def _parse_json_table(self) -> exp.JSONTable: 6887 this = self._parse_format_json(self._parse_bitwise()) 6888 path = self._match(TokenType.COMMA) and self._parse_string() 6889 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6890 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6891 schema = self._parse_json_schema() 6892 6893 return exp.JSONTable( 6894 this=this, 6895 schema=schema, 6896 path=path, 6897 error_handling=error_handling, 6898 empty_handling=empty_handling, 6899 ) 6900 6901 def _parse_match_against(self) -> exp.MatchAgainst: 6902 if self._match_text_seq("TABLE"): 6903 # parse SingleStore MATCH(TABLE ...) syntax 6904 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6905 expressions = [] 6906 table = self._parse_table() 6907 if table: 6908 expressions = [table] 6909 else: 6910 expressions = self._parse_csv(self._parse_column) 6911 6912 self._match_text_seq(")", "AGAINST", "(") 6913 6914 this = self._parse_string() 6915 6916 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6917 modifier = "IN NATURAL LANGUAGE MODE" 6918 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6919 modifier = f"{modifier} WITH QUERY EXPANSION" 6920 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6921 modifier = "IN BOOLEAN MODE" 6922 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6923 modifier = "WITH QUERY EXPANSION" 6924 else: 6925 modifier = None 6926 6927 return self.expression( 6928 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6929 ) 6930 6931 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6932 def _parse_open_json(self) -> exp.OpenJSON: 6933 this = self._parse_bitwise() 6934 path = self._match(TokenType.COMMA) and self._parse_string() 6935 6936 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6937 this = self._parse_field(any_token=True) 6938 kind = self._parse_types() 6939 path = self._parse_string() 6940 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6941 6942 return self.expression( 6943 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6944 ) 6945 6946 expressions = None 6947 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6948 self._match_l_paren() 6949 expressions = self._parse_csv(_parse_open_json_column_def) 6950 6951 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6952 6953 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6954 args = self._parse_csv(self._parse_bitwise) 6955 6956 if self._match(TokenType.IN): 6957 return self.expression( 6958 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6959 ) 6960 6961 if haystack_first: 6962 haystack = seq_get(args, 0) 6963 needle = seq_get(args, 1) 6964 else: 6965 haystack = seq_get(args, 1) 6966 needle = seq_get(args, 0) 6967 6968 return self.expression( 6969 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6970 ) 6971 6972 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6973 args = self._parse_csv(self._parse_table) 6974 return exp.JoinHint(this=func_name.upper(), expressions=args) 6975 6976 def _parse_substring(self) -> exp.Substring: 6977 # Postgres supports the form: substring(string [from int] [for int]) 6978 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6979 6980 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6981 6982 if self._match(TokenType.FROM): 6983 args.append(self._parse_bitwise()) 6984 if self._match(TokenType.FOR): 6985 if len(args) == 1: 6986 args.append(exp.Literal.number(1)) 6987 args.append(self._parse_bitwise()) 6988 6989 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6990 6991 def _parse_trim(self) -> exp.Trim: 6992 # https://www.w3resource.com/sql/character-functions/trim.php 6993 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6994 6995 position = None 6996 collation = None 6997 expression = None 6998 6999 if self._match_texts(self.TRIM_TYPES): 7000 position = self._prev.text.upper() 7001 7002 this = self._parse_bitwise() 7003 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7004 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7005 expression = self._parse_bitwise() 7006 7007 if invert_order: 7008 this, expression = expression, this 7009 7010 if self._match(TokenType.COLLATE): 7011 collation = self._parse_bitwise() 7012 7013 return self.expression( 7014 exp.Trim, this=this, position=position, expression=expression, collation=collation 7015 ) 7016 7017 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7018 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7019 7020 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7021 return self._parse_window(self._parse_id_var(), alias=True) 7022 7023 def _parse_respect_or_ignore_nulls( 7024 self, this: t.Optional[exp.Expression] 7025 ) -> t.Optional[exp.Expression]: 7026 if self._match_text_seq("IGNORE", "NULLS"): 7027 return self.expression(exp.IgnoreNulls, this=this) 7028 if self._match_text_seq("RESPECT", "NULLS"): 7029 return self.expression(exp.RespectNulls, this=this) 7030 return this 7031 7032 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7033 if self._match(TokenType.HAVING): 7034 self._match_texts(("MAX", "MIN")) 7035 max = self._prev.text.upper() != "MIN" 7036 return self.expression( 7037 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7038 ) 7039 7040 return this 7041 7042 def _parse_window( 7043 self, this: t.Optional[exp.Expression], alias: bool = False 7044 ) -> t.Optional[exp.Expression]: 7045 func = this 7046 comments = func.comments if isinstance(func, exp.Expression) else None 7047 7048 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7049 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7050 if self._match_text_seq("WITHIN", "GROUP"): 7051 order = self._parse_wrapped(self._parse_order) 7052 this = self.expression(exp.WithinGroup, this=this, expression=order) 7053 7054 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7055 self._match(TokenType.WHERE) 7056 this = self.expression( 7057 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7058 ) 7059 self._match_r_paren() 7060 7061 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7062 # Some dialects choose to implement and some do not. 7063 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7064 7065 # There is some code above in _parse_lambda that handles 7066 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7067 7068 # The below changes handle 7069 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7070 7071 # Oracle allows both formats 7072 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7073 # and Snowflake chose to do the same for familiarity 7074 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7075 if isinstance(this, exp.AggFunc): 7076 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7077 7078 if ignore_respect and ignore_respect is not this: 7079 ignore_respect.replace(ignore_respect.this) 7080 this = self.expression(ignore_respect.__class__, this=this) 7081 7082 this = self._parse_respect_or_ignore_nulls(this) 7083 7084 # bigquery select from window x AS (partition by ...) 7085 if alias: 7086 over = None 7087 self._match(TokenType.ALIAS) 7088 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7089 return this 7090 else: 7091 over = self._prev.text.upper() 7092 7093 if comments and isinstance(func, exp.Expression): 7094 func.pop_comments() 7095 7096 if not self._match(TokenType.L_PAREN): 7097 return self.expression( 7098 exp.Window, 7099 comments=comments, 7100 this=this, 7101 alias=self._parse_id_var(False), 7102 over=over, 7103 ) 7104 7105 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7106 7107 first = self._match(TokenType.FIRST) 7108 if self._match_text_seq("LAST"): 7109 first = False 7110 7111 partition, order = self._parse_partition_and_order() 7112 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7113 7114 if kind: 7115 self._match(TokenType.BETWEEN) 7116 start = self._parse_window_spec() 7117 7118 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7119 exclude = ( 7120 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7121 if self._match_text_seq("EXCLUDE") 7122 else None 7123 ) 7124 7125 spec = self.expression( 7126 exp.WindowSpec, 7127 kind=kind, 7128 start=start["value"], 7129 start_side=start["side"], 7130 end=end.get("value"), 7131 end_side=end.get("side"), 7132 exclude=exclude, 7133 ) 7134 else: 7135 spec = None 7136 7137 self._match_r_paren() 7138 7139 window = self.expression( 7140 exp.Window, 7141 comments=comments, 7142 this=this, 7143 partition_by=partition, 7144 order=order, 7145 spec=spec, 7146 alias=window_alias, 7147 over=over, 7148 first=first, 7149 ) 7150 7151 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7152 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7153 return self._parse_window(window, alias=alias) 7154 7155 return window 7156 7157 def _parse_partition_and_order( 7158 self, 7159 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7160 return self._parse_partition_by(), self._parse_order() 7161 7162 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7163 self._match(TokenType.BETWEEN) 7164 7165 return { 7166 "value": ( 7167 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7168 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7169 or self._parse_type() 7170 ), 7171 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7172 } 7173 7174 def _parse_alias( 7175 self, this: t.Optional[exp.Expression], explicit: bool = False 7176 ) -> t.Optional[exp.Expression]: 7177 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7178 # so this section tries to parse the clause version and if it fails, it treats the token 7179 # as an identifier (alias) 7180 if self._can_parse_limit_or_offset(): 7181 return this 7182 7183 any_token = self._match(TokenType.ALIAS) 7184 comments = self._prev_comments or [] 7185 7186 if explicit and not any_token: 7187 return this 7188 7189 if self._match(TokenType.L_PAREN): 7190 aliases = self.expression( 7191 exp.Aliases, 7192 comments=comments, 7193 this=this, 7194 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7195 ) 7196 self._match_r_paren(aliases) 7197 return aliases 7198 7199 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7200 self.STRING_ALIASES and self._parse_string_as_identifier() 7201 ) 7202 7203 if alias: 7204 comments.extend(alias.pop_comments()) 7205 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7206 column = this.this 7207 7208 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7209 if not this.comments and column and column.comments: 7210 this.comments = column.pop_comments() 7211 7212 return this 7213 7214 def _parse_id_var( 7215 self, 7216 any_token: bool = True, 7217 tokens: t.Optional[t.Collection[TokenType]] = None, 7218 ) -> t.Optional[exp.Expression]: 7219 expression = self._parse_identifier() 7220 if not expression and ( 7221 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7222 ): 7223 quoted = self._prev.token_type == TokenType.STRING 7224 expression = self._identifier_expression(quoted=quoted) 7225 7226 return expression 7227 7228 def _parse_string(self) -> t.Optional[exp.Expression]: 7229 if self._match_set(self.STRING_PARSERS): 7230 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7231 return self._parse_placeholder() 7232 7233 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7234 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7235 if output: 7236 output.update_positions(self._prev) 7237 return output 7238 7239 def _parse_number(self) -> t.Optional[exp.Expression]: 7240 if self._match_set(self.NUMERIC_PARSERS): 7241 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7242 return self._parse_placeholder() 7243 7244 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7245 if self._match(TokenType.IDENTIFIER): 7246 return self._identifier_expression(quoted=True) 7247 return self._parse_placeholder() 7248 7249 def _parse_var( 7250 self, 7251 any_token: bool = False, 7252 tokens: t.Optional[t.Collection[TokenType]] = None, 7253 upper: bool = False, 7254 ) -> t.Optional[exp.Expression]: 7255 if ( 7256 (any_token and self._advance_any()) 7257 or self._match(TokenType.VAR) 7258 or (self._match_set(tokens) if tokens else False) 7259 ): 7260 return self.expression( 7261 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7262 ) 7263 return self._parse_placeholder() 7264 7265 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7266 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7267 self._advance() 7268 return self._prev 7269 return None 7270 7271 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7272 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7273 7274 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7275 return self._parse_primary() or self._parse_var(any_token=True) 7276 7277 def _parse_null(self) -> t.Optional[exp.Expression]: 7278 if self._match_set(self.NULL_TOKENS): 7279 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7280 return self._parse_placeholder() 7281 7282 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7283 if self._match(TokenType.TRUE): 7284 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7285 if self._match(TokenType.FALSE): 7286 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7287 return self._parse_placeholder() 7288 7289 def _parse_star(self) -> t.Optional[exp.Expression]: 7290 if self._match(TokenType.STAR): 7291 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7292 return self._parse_placeholder() 7293 7294 def _parse_parameter(self) -> exp.Parameter: 7295 this = self._parse_identifier() or self._parse_primary_or_var() 7296 return self.expression(exp.Parameter, this=this) 7297 7298 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7299 if self._match_set(self.PLACEHOLDER_PARSERS): 7300 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7301 if placeholder: 7302 return placeholder 7303 self._advance(-1) 7304 return None 7305 7306 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7307 if not self._match_texts(keywords): 7308 return None 7309 if self._match(TokenType.L_PAREN, advance=False): 7310 return self._parse_wrapped_csv(self._parse_expression) 7311 7312 expression = self._parse_expression() 7313 return [expression] if expression else None 7314 7315 def _parse_csv( 7316 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7317 ) -> t.List[exp.Expression]: 7318 parse_result = parse_method() 7319 items = [parse_result] if parse_result is not None else [] 7320 7321 while self._match(sep): 7322 self._add_comments(parse_result) 7323 parse_result = parse_method() 7324 if parse_result is not None: 7325 items.append(parse_result) 7326 7327 return items 7328 7329 def _parse_tokens( 7330 self, parse_method: t.Callable, expressions: t.Dict 7331 ) -> t.Optional[exp.Expression]: 7332 this = parse_method() 7333 7334 while self._match_set(expressions): 7335 this = self.expression( 7336 expressions[self._prev.token_type], 7337 this=this, 7338 comments=self._prev_comments, 7339 expression=parse_method(), 7340 ) 7341 7342 return this 7343 7344 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7345 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7346 7347 def _parse_wrapped_csv( 7348 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7349 ) -> t.List[exp.Expression]: 7350 return self._parse_wrapped( 7351 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7352 ) 7353 7354 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7355 wrapped = self._match(TokenType.L_PAREN) 7356 if not wrapped and not optional: 7357 self.raise_error("Expecting (") 7358 parse_result = parse_method() 7359 if wrapped: 7360 self._match_r_paren() 7361 return parse_result 7362 7363 def _parse_expressions(self) -> t.List[exp.Expression]: 7364 return self._parse_csv(self._parse_expression) 7365 7366 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7367 return ( 7368 self._parse_set_operations( 7369 self._parse_alias(self._parse_assignment(), explicit=True) 7370 if alias 7371 else self._parse_assignment() 7372 ) 7373 or self._parse_select() 7374 ) 7375 7376 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7377 return self._parse_query_modifiers( 7378 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7379 ) 7380 7381 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7382 this = None 7383 if self._match_texts(self.TRANSACTION_KIND): 7384 this = self._prev.text 7385 7386 self._match_texts(("TRANSACTION", "WORK")) 7387 7388 modes = [] 7389 while True: 7390 mode = [] 7391 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7392 mode.append(self._prev.text) 7393 7394 if mode: 7395 modes.append(" ".join(mode)) 7396 if not self._match(TokenType.COMMA): 7397 break 7398 7399 return self.expression(exp.Transaction, this=this, modes=modes) 7400 7401 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7402 chain = None 7403 savepoint = None 7404 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7405 7406 self._match_texts(("TRANSACTION", "WORK")) 7407 7408 if self._match_text_seq("TO"): 7409 self._match_text_seq("SAVEPOINT") 7410 savepoint = self._parse_id_var() 7411 7412 if self._match(TokenType.AND): 7413 chain = not self._match_text_seq("NO") 7414 self._match_text_seq("CHAIN") 7415 7416 if is_rollback: 7417 return self.expression(exp.Rollback, savepoint=savepoint) 7418 7419 return self.expression(exp.Commit, chain=chain) 7420 7421 def _parse_refresh(self) -> exp.Refresh: 7422 self._match(TokenType.TABLE) 7423 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7424 7425 def _parse_column_def_with_exists(self): 7426 start = self._index 7427 self._match(TokenType.COLUMN) 7428 7429 exists_column = self._parse_exists(not_=True) 7430 expression = self._parse_field_def() 7431 7432 if not isinstance(expression, exp.ColumnDef): 7433 self._retreat(start) 7434 return None 7435 7436 expression.set("exists", exists_column) 7437 7438 return expression 7439 7440 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7441 if not self._prev.text.upper() == "ADD": 7442 return None 7443 7444 expression = self._parse_column_def_with_exists() 7445 if not expression: 7446 return None 7447 7448 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7449 if self._match_texts(("FIRST", "AFTER")): 7450 position = self._prev.text 7451 column_position = self.expression( 7452 exp.ColumnPosition, this=self._parse_column(), position=position 7453 ) 7454 expression.set("position", column_position) 7455 7456 return expression 7457 7458 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7459 drop = self._match(TokenType.DROP) and self._parse_drop() 7460 if drop and not isinstance(drop, exp.Command): 7461 drop.set("kind", drop.args.get("kind", "COLUMN")) 7462 return drop 7463 7464 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7465 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7466 return self.expression( 7467 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7468 ) 7469 7470 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7471 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7472 self._match_text_seq("ADD") 7473 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7474 return self.expression( 7475 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7476 ) 7477 7478 column_def = self._parse_add_column() 7479 if isinstance(column_def, exp.ColumnDef): 7480 return column_def 7481 7482 exists = self._parse_exists(not_=True) 7483 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7484 return self.expression( 7485 exp.AddPartition, 7486 exists=exists, 7487 this=self._parse_field(any_token=True), 7488 location=self._match_text_seq("LOCATION", advance=False) 7489 and self._parse_property(), 7490 ) 7491 7492 return None 7493 7494 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7495 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7496 or self._match_text_seq("COLUMNS") 7497 ): 7498 schema = self._parse_schema() 7499 7500 return ( 7501 ensure_list(schema) 7502 if schema 7503 else self._parse_csv(self._parse_column_def_with_exists) 7504 ) 7505 7506 return self._parse_csv(_parse_add_alteration) 7507 7508 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7509 if self._match_texts(self.ALTER_ALTER_PARSERS): 7510 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7511 7512 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7513 # keyword after ALTER we default to parsing this statement 7514 self._match(TokenType.COLUMN) 7515 column = self._parse_field(any_token=True) 7516 7517 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7518 return self.expression(exp.AlterColumn, this=column, drop=True) 7519 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7520 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7521 if self._match(TokenType.COMMENT): 7522 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7523 if self._match_text_seq("DROP", "NOT", "NULL"): 7524 return self.expression( 7525 exp.AlterColumn, 7526 this=column, 7527 drop=True, 7528 allow_null=True, 7529 ) 7530 if self._match_text_seq("SET", "NOT", "NULL"): 7531 return self.expression( 7532 exp.AlterColumn, 7533 this=column, 7534 allow_null=False, 7535 ) 7536 7537 if self._match_text_seq("SET", "VISIBLE"): 7538 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7539 if self._match_text_seq("SET", "INVISIBLE"): 7540 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7541 7542 self._match_text_seq("SET", "DATA") 7543 self._match_text_seq("TYPE") 7544 return self.expression( 7545 exp.AlterColumn, 7546 this=column, 7547 dtype=self._parse_types(), 7548 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7549 using=self._match(TokenType.USING) and self._parse_assignment(), 7550 ) 7551 7552 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7553 if self._match_texts(("ALL", "EVEN", "AUTO")): 7554 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7555 7556 self._match_text_seq("KEY", "DISTKEY") 7557 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7558 7559 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7560 if compound: 7561 self._match_text_seq("SORTKEY") 7562 7563 if self._match(TokenType.L_PAREN, advance=False): 7564 return self.expression( 7565 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7566 ) 7567 7568 self._match_texts(("AUTO", "NONE")) 7569 return self.expression( 7570 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7571 ) 7572 7573 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7574 index = self._index - 1 7575 7576 partition_exists = self._parse_exists() 7577 if self._match(TokenType.PARTITION, advance=False): 7578 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7579 7580 self._retreat(index) 7581 return self._parse_csv(self._parse_drop_column) 7582 7583 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7584 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7585 exists = self._parse_exists() 7586 old_column = self._parse_column() 7587 to = self._match_text_seq("TO") 7588 new_column = self._parse_column() 7589 7590 if old_column is None or to is None or new_column is None: 7591 return None 7592 7593 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7594 7595 self._match_text_seq("TO") 7596 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7597 7598 def _parse_alter_table_set(self) -> exp.AlterSet: 7599 alter_set = self.expression(exp.AlterSet) 7600 7601 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7602 "TABLE", "PROPERTIES" 7603 ): 7604 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7605 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7606 alter_set.set("expressions", [self._parse_assignment()]) 7607 elif self._match_texts(("LOGGED", "UNLOGGED")): 7608 alter_set.set("option", exp.var(self._prev.text.upper())) 7609 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7610 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7611 elif self._match_text_seq("LOCATION"): 7612 alter_set.set("location", self._parse_field()) 7613 elif self._match_text_seq("ACCESS", "METHOD"): 7614 alter_set.set("access_method", self._parse_field()) 7615 elif self._match_text_seq("TABLESPACE"): 7616 alter_set.set("tablespace", self._parse_field()) 7617 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7618 alter_set.set("file_format", [self._parse_field()]) 7619 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7620 alter_set.set("file_format", self._parse_wrapped_options()) 7621 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7622 alter_set.set("copy_options", self._parse_wrapped_options()) 7623 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7624 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7625 else: 7626 if self._match_text_seq("SERDE"): 7627 alter_set.set("serde", self._parse_field()) 7628 7629 properties = self._parse_wrapped(self._parse_properties, optional=True) 7630 alter_set.set("expressions", [properties]) 7631 7632 return alter_set 7633 7634 def _parse_alter_session(self) -> exp.AlterSession: 7635 """Parse ALTER SESSION SET/UNSET statements.""" 7636 if self._match(TokenType.SET): 7637 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7638 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7639 7640 self._match_text_seq("UNSET") 7641 expressions = self._parse_csv( 7642 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7643 ) 7644 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7645 7646 def _parse_alter(self) -> exp.Alter | exp.Command: 7647 start = self._prev 7648 7649 alter_token = self._match_set(self.ALTERABLES) and self._prev 7650 if not alter_token: 7651 return self._parse_as_command(start) 7652 7653 exists = self._parse_exists() 7654 only = self._match_text_seq("ONLY") 7655 7656 if alter_token.token_type == TokenType.SESSION: 7657 this = None 7658 check = None 7659 cluster = None 7660 else: 7661 this = self._parse_table(schema=True) 7662 check = self._match_text_seq("WITH", "CHECK") 7663 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7664 7665 if self._next: 7666 self._advance() 7667 7668 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7669 if parser: 7670 actions = ensure_list(parser(self)) 7671 not_valid = self._match_text_seq("NOT", "VALID") 7672 options = self._parse_csv(self._parse_property) 7673 7674 if not self._curr and actions: 7675 return self.expression( 7676 exp.Alter, 7677 this=this, 7678 kind=alter_token.text.upper(), 7679 exists=exists, 7680 actions=actions, 7681 only=only, 7682 options=options, 7683 cluster=cluster, 7684 not_valid=not_valid, 7685 check=check, 7686 ) 7687 7688 return self._parse_as_command(start) 7689 7690 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7691 start = self._prev 7692 # https://duckdb.org/docs/sql/statements/analyze 7693 if not self._curr: 7694 return self.expression(exp.Analyze) 7695 7696 options = [] 7697 while self._match_texts(self.ANALYZE_STYLES): 7698 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7699 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7700 else: 7701 options.append(self._prev.text.upper()) 7702 7703 this: t.Optional[exp.Expression] = None 7704 inner_expression: t.Optional[exp.Expression] = None 7705 7706 kind = self._curr and self._curr.text.upper() 7707 7708 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7709 this = self._parse_table_parts() 7710 elif self._match_text_seq("TABLES"): 7711 if self._match_set((TokenType.FROM, TokenType.IN)): 7712 kind = f"{kind} {self._prev.text.upper()}" 7713 this = self._parse_table(schema=True, is_db_reference=True) 7714 elif self._match_text_seq("DATABASE"): 7715 this = self._parse_table(schema=True, is_db_reference=True) 7716 elif self._match_text_seq("CLUSTER"): 7717 this = self._parse_table() 7718 # Try matching inner expr keywords before fallback to parse table. 7719 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7720 kind = None 7721 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7722 else: 7723 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7724 kind = None 7725 this = self._parse_table_parts() 7726 7727 partition = self._try_parse(self._parse_partition) 7728 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7729 return self._parse_as_command(start) 7730 7731 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7732 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7733 "WITH", "ASYNC", "MODE" 7734 ): 7735 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7736 else: 7737 mode = None 7738 7739 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7740 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7741 7742 properties = self._parse_properties() 7743 return self.expression( 7744 exp.Analyze, 7745 kind=kind, 7746 this=this, 7747 mode=mode, 7748 partition=partition, 7749 properties=properties, 7750 expression=inner_expression, 7751 options=options, 7752 ) 7753 7754 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7755 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7756 this = None 7757 kind = self._prev.text.upper() 7758 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7759 expressions = [] 7760 7761 if not self._match_text_seq("STATISTICS"): 7762 self.raise_error("Expecting token STATISTICS") 7763 7764 if self._match_text_seq("NOSCAN"): 7765 this = "NOSCAN" 7766 elif self._match(TokenType.FOR): 7767 if self._match_text_seq("ALL", "COLUMNS"): 7768 this = "FOR ALL COLUMNS" 7769 if self._match_texts("COLUMNS"): 7770 this = "FOR COLUMNS" 7771 expressions = self._parse_csv(self._parse_column_reference) 7772 elif self._match_text_seq("SAMPLE"): 7773 sample = self._parse_number() 7774 expressions = [ 7775 self.expression( 7776 exp.AnalyzeSample, 7777 sample=sample, 7778 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7779 ) 7780 ] 7781 7782 return self.expression( 7783 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7784 ) 7785 7786 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7787 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7788 kind = None 7789 this = None 7790 expression: t.Optional[exp.Expression] = None 7791 if self._match_text_seq("REF", "UPDATE"): 7792 kind = "REF" 7793 this = "UPDATE" 7794 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7795 this = "UPDATE SET DANGLING TO NULL" 7796 elif self._match_text_seq("STRUCTURE"): 7797 kind = "STRUCTURE" 7798 if self._match_text_seq("CASCADE", "FAST"): 7799 this = "CASCADE FAST" 7800 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7801 ("ONLINE", "OFFLINE") 7802 ): 7803 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7804 expression = self._parse_into() 7805 7806 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7807 7808 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7809 this = self._prev.text.upper() 7810 if self._match_text_seq("COLUMNS"): 7811 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7812 return None 7813 7814 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7815 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7816 if self._match_text_seq("STATISTICS"): 7817 return self.expression(exp.AnalyzeDelete, kind=kind) 7818 return None 7819 7820 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7821 if self._match_text_seq("CHAINED", "ROWS"): 7822 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7823 return None 7824 7825 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7826 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7827 this = self._prev.text.upper() 7828 expression: t.Optional[exp.Expression] = None 7829 expressions = [] 7830 update_options = None 7831 7832 if self._match_text_seq("HISTOGRAM", "ON"): 7833 expressions = self._parse_csv(self._parse_column_reference) 7834 with_expressions = [] 7835 while self._match(TokenType.WITH): 7836 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7837 if self._match_texts(("SYNC", "ASYNC")): 7838 if self._match_text_seq("MODE", advance=False): 7839 with_expressions.append(f"{self._prev.text.upper()} MODE") 7840 self._advance() 7841 else: 7842 buckets = self._parse_number() 7843 if self._match_text_seq("BUCKETS"): 7844 with_expressions.append(f"{buckets} BUCKETS") 7845 if with_expressions: 7846 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7847 7848 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7849 TokenType.UPDATE, advance=False 7850 ): 7851 update_options = self._prev.text.upper() 7852 self._advance() 7853 elif self._match_text_seq("USING", "DATA"): 7854 expression = self.expression(exp.UsingData, this=self._parse_string()) 7855 7856 return self.expression( 7857 exp.AnalyzeHistogram, 7858 this=this, 7859 expressions=expressions, 7860 expression=expression, 7861 update_options=update_options, 7862 ) 7863 7864 def _parse_merge(self) -> exp.Merge: 7865 self._match(TokenType.INTO) 7866 target = self._parse_table() 7867 7868 if target and self._match(TokenType.ALIAS, advance=False): 7869 target.set("alias", self._parse_table_alias()) 7870 7871 self._match(TokenType.USING) 7872 using = self._parse_table() 7873 7874 self._match(TokenType.ON) 7875 on = self._parse_assignment() 7876 7877 return self.expression( 7878 exp.Merge, 7879 this=target, 7880 using=using, 7881 on=on, 7882 whens=self._parse_when_matched(), 7883 returning=self._parse_returning(), 7884 ) 7885 7886 def _parse_when_matched(self) -> exp.Whens: 7887 whens = [] 7888 7889 while self._match(TokenType.WHEN): 7890 matched = not self._match(TokenType.NOT) 7891 self._match_text_seq("MATCHED") 7892 source = ( 7893 False 7894 if self._match_text_seq("BY", "TARGET") 7895 else self._match_text_seq("BY", "SOURCE") 7896 ) 7897 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7898 7899 self._match(TokenType.THEN) 7900 7901 if self._match(TokenType.INSERT): 7902 this = self._parse_star() 7903 if this: 7904 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7905 else: 7906 then = self.expression( 7907 exp.Insert, 7908 this=exp.var("ROW") 7909 if self._match_text_seq("ROW") 7910 else self._parse_value(values=False), 7911 expression=self._match_text_seq("VALUES") and self._parse_value(), 7912 ) 7913 elif self._match(TokenType.UPDATE): 7914 expressions = self._parse_star() 7915 if expressions: 7916 then = self.expression(exp.Update, expressions=expressions) 7917 else: 7918 then = self.expression( 7919 exp.Update, 7920 expressions=self._match(TokenType.SET) 7921 and self._parse_csv(self._parse_equality), 7922 ) 7923 elif self._match(TokenType.DELETE): 7924 then = self.expression(exp.Var, this=self._prev.text) 7925 else: 7926 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7927 7928 whens.append( 7929 self.expression( 7930 exp.When, 7931 matched=matched, 7932 source=source, 7933 condition=condition, 7934 then=then, 7935 ) 7936 ) 7937 return self.expression(exp.Whens, expressions=whens) 7938 7939 def _parse_show(self) -> t.Optional[exp.Expression]: 7940 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7941 if parser: 7942 return parser(self) 7943 return self._parse_as_command(self._prev) 7944 7945 def _parse_set_item_assignment( 7946 self, kind: t.Optional[str] = None 7947 ) -> t.Optional[exp.Expression]: 7948 index = self._index 7949 7950 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7951 return self._parse_set_transaction(global_=kind == "GLOBAL") 7952 7953 left = self._parse_primary() or self._parse_column() 7954 assignment_delimiter = self._match_texts(("=", "TO")) 7955 7956 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7957 self._retreat(index) 7958 return None 7959 7960 right = self._parse_statement() or self._parse_id_var() 7961 if isinstance(right, (exp.Column, exp.Identifier)): 7962 right = exp.var(right.name) 7963 7964 this = self.expression(exp.EQ, this=left, expression=right) 7965 return self.expression(exp.SetItem, this=this, kind=kind) 7966 7967 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7968 self._match_text_seq("TRANSACTION") 7969 characteristics = self._parse_csv( 7970 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7971 ) 7972 return self.expression( 7973 exp.SetItem, 7974 expressions=characteristics, 7975 kind="TRANSACTION", 7976 **{"global": global_}, # type: ignore 7977 ) 7978 7979 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7980 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7981 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7982 7983 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7984 index = self._index 7985 set_ = self.expression( 7986 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7987 ) 7988 7989 if self._curr: 7990 self._retreat(index) 7991 return self._parse_as_command(self._prev) 7992 7993 return set_ 7994 7995 def _parse_var_from_options( 7996 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7997 ) -> t.Optional[exp.Var]: 7998 start = self._curr 7999 if not start: 8000 return None 8001 8002 option = start.text.upper() 8003 continuations = options.get(option) 8004 8005 index = self._index 8006 self._advance() 8007 for keywords in continuations or []: 8008 if isinstance(keywords, str): 8009 keywords = (keywords,) 8010 8011 if self._match_text_seq(*keywords): 8012 option = f"{option} {' '.join(keywords)}" 8013 break 8014 else: 8015 if continuations or continuations is None: 8016 if raise_unmatched: 8017 self.raise_error(f"Unknown option {option}") 8018 8019 self._retreat(index) 8020 return None 8021 8022 return exp.var(option) 8023 8024 def _parse_as_command(self, start: Token) -> exp.Command: 8025 while self._curr: 8026 self._advance() 8027 text = self._find_sql(start, self._prev) 8028 size = len(start.text) 8029 self._warn_unsupported() 8030 return exp.Command(this=text[:size], expression=text[size:]) 8031 8032 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8033 settings = [] 8034 8035 self._match_l_paren() 8036 kind = self._parse_id_var() 8037 8038 if self._match(TokenType.L_PAREN): 8039 while True: 8040 key = self._parse_id_var() 8041 value = self._parse_primary() 8042 if not key and value is None: 8043 break 8044 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8045 self._match(TokenType.R_PAREN) 8046 8047 self._match_r_paren() 8048 8049 return self.expression( 8050 exp.DictProperty, 8051 this=this, 8052 kind=kind.this if kind else None, 8053 settings=settings, 8054 ) 8055 8056 def _parse_dict_range(self, this: str) -> exp.DictRange: 8057 self._match_l_paren() 8058 has_min = self._match_text_seq("MIN") 8059 if has_min: 8060 min = self._parse_var() or self._parse_primary() 8061 self._match_text_seq("MAX") 8062 max = self._parse_var() or self._parse_primary() 8063 else: 8064 max = self._parse_var() or self._parse_primary() 8065 min = exp.Literal.number(0) 8066 self._match_r_paren() 8067 return self.expression(exp.DictRange, this=this, min=min, max=max) 8068 8069 def _parse_comprehension( 8070 self, this: t.Optional[exp.Expression] 8071 ) -> t.Optional[exp.Comprehension]: 8072 index = self._index 8073 expression = self._parse_column() 8074 if not self._match(TokenType.IN): 8075 self._retreat(index - 1) 8076 return None 8077 iterator = self._parse_column() 8078 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8079 return self.expression( 8080 exp.Comprehension, 8081 this=this, 8082 expression=expression, 8083 iterator=iterator, 8084 condition=condition, 8085 ) 8086 8087 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8088 if self._match(TokenType.HEREDOC_STRING): 8089 return self.expression(exp.Heredoc, this=self._prev.text) 8090 8091 if not self._match_text_seq("$"): 8092 return None 8093 8094 tags = ["$"] 8095 tag_text = None 8096 8097 if self._is_connected(): 8098 self._advance() 8099 tags.append(self._prev.text.upper()) 8100 else: 8101 self.raise_error("No closing $ found") 8102 8103 if tags[-1] != "$": 8104 if self._is_connected() and self._match_text_seq("$"): 8105 tag_text = tags[-1] 8106 tags.append("$") 8107 else: 8108 self.raise_error("No closing $ found") 8109 8110 heredoc_start = self._curr 8111 8112 while self._curr: 8113 if self._match_text_seq(*tags, advance=False): 8114 this = self._find_sql(heredoc_start, self._prev) 8115 self._advance(len(tags)) 8116 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8117 8118 self._advance() 8119 8120 self.raise_error(f"No closing {''.join(tags)} found") 8121 return None 8122 8123 def _find_parser( 8124 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8125 ) -> t.Optional[t.Callable]: 8126 if not self._curr: 8127 return None 8128 8129 index = self._index 8130 this = [] 8131 while True: 8132 # The current token might be multiple words 8133 curr = self._curr.text.upper() 8134 key = curr.split(" ") 8135 this.append(curr) 8136 8137 self._advance() 8138 result, trie = in_trie(trie, key) 8139 if result == TrieResult.FAILED: 8140 break 8141 8142 if result == TrieResult.EXISTS: 8143 subparser = parsers[" ".join(this)] 8144 return subparser 8145 8146 self._retreat(index) 8147 return None 8148 8149 def _match(self, token_type, advance=True, expression=None): 8150 if not self._curr: 8151 return None 8152 8153 if self._curr.token_type == token_type: 8154 if advance: 8155 self._advance() 8156 self._add_comments(expression) 8157 return True 8158 8159 return None 8160 8161 def _match_set(self, types, advance=True): 8162 if not self._curr: 8163 return None 8164 8165 if self._curr.token_type in types: 8166 if advance: 8167 self._advance() 8168 return True 8169 8170 return None 8171 8172 def _match_pair(self, token_type_a, token_type_b, advance=True): 8173 if not self._curr or not self._next: 8174 return None 8175 8176 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8177 if advance: 8178 self._advance(2) 8179 return True 8180 8181 return None 8182 8183 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8184 if not self._match(TokenType.L_PAREN, expression=expression): 8185 self.raise_error("Expecting (") 8186 8187 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8188 if not self._match(TokenType.R_PAREN, expression=expression): 8189 self.raise_error("Expecting )") 8190 8191 def _match_texts(self, texts, advance=True): 8192 if ( 8193 self._curr 8194 and self._curr.token_type != TokenType.STRING 8195 and self._curr.text.upper() in texts 8196 ): 8197 if advance: 8198 self._advance() 8199 return True 8200 return None 8201 8202 def _match_text_seq(self, *texts, advance=True): 8203 index = self._index 8204 for text in texts: 8205 if ( 8206 self._curr 8207 and self._curr.token_type != TokenType.STRING 8208 and self._curr.text.upper() == text 8209 ): 8210 self._advance() 8211 else: 8212 self._retreat(index) 8213 return None 8214 8215 if not advance: 8216 self._retreat(index) 8217 8218 return True 8219 8220 def _replace_lambda( 8221 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8222 ) -> t.Optional[exp.Expression]: 8223 if not node: 8224 return node 8225 8226 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8227 8228 for column in node.find_all(exp.Column): 8229 typ = lambda_types.get(column.parts[0].name) 8230 if typ is not None: 8231 dot_or_id = column.to_dot() if column.table else column.this 8232 8233 if typ: 8234 dot_or_id = self.expression( 8235 exp.Cast, 8236 this=dot_or_id, 8237 to=typ, 8238 ) 8239 8240 parent = column.parent 8241 8242 while isinstance(parent, exp.Dot): 8243 if not isinstance(parent.parent, exp.Dot): 8244 parent.replace(dot_or_id) 8245 break 8246 parent = parent.parent 8247 else: 8248 if column is node: 8249 node = dot_or_id 8250 else: 8251 column.replace(dot_or_id) 8252 return node 8253 8254 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8255 start = self._prev 8256 8257 # Not to be confused with TRUNCATE(number, decimals) function call 8258 if self._match(TokenType.L_PAREN): 8259 self._retreat(self._index - 2) 8260 return self._parse_function() 8261 8262 # Clickhouse supports TRUNCATE DATABASE as well 8263 is_database = self._match(TokenType.DATABASE) 8264 8265 self._match(TokenType.TABLE) 8266 8267 exists = self._parse_exists(not_=False) 8268 8269 expressions = self._parse_csv( 8270 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8271 ) 8272 8273 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8274 8275 if self._match_text_seq("RESTART", "IDENTITY"): 8276 identity = "RESTART" 8277 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8278 identity = "CONTINUE" 8279 else: 8280 identity = None 8281 8282 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8283 option = self._prev.text 8284 else: 8285 option = None 8286 8287 partition = self._parse_partition() 8288 8289 # Fallback case 8290 if self._curr: 8291 return self._parse_as_command(start) 8292 8293 return self.expression( 8294 exp.TruncateTable, 8295 expressions=expressions, 8296 is_database=is_database, 8297 exists=exists, 8298 cluster=cluster, 8299 identity=identity, 8300 option=option, 8301 partition=partition, 8302 ) 8303 8304 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8305 this = self._parse_ordered(self._parse_opclass) 8306 8307 if not self._match(TokenType.WITH): 8308 return this 8309 8310 op = self._parse_var(any_token=True) 8311 8312 return self.expression(exp.WithOperator, this=this, op=op) 8313 8314 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8315 self._match(TokenType.EQ) 8316 self._match(TokenType.L_PAREN) 8317 8318 opts: t.List[t.Optional[exp.Expression]] = [] 8319 option: exp.Expression | None 8320 while self._curr and not self._match(TokenType.R_PAREN): 8321 if self._match_text_seq("FORMAT_NAME", "="): 8322 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8323 option = self._parse_format_name() 8324 else: 8325 option = self._parse_property() 8326 8327 if option is None: 8328 self.raise_error("Unable to parse option") 8329 break 8330 8331 opts.append(option) 8332 8333 return opts 8334 8335 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8336 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8337 8338 options = [] 8339 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8340 option = self._parse_var(any_token=True) 8341 prev = self._prev.text.upper() 8342 8343 # Different dialects might separate options and values by white space, "=" and "AS" 8344 self._match(TokenType.EQ) 8345 self._match(TokenType.ALIAS) 8346 8347 param = self.expression(exp.CopyParameter, this=option) 8348 8349 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8350 TokenType.L_PAREN, advance=False 8351 ): 8352 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8353 param.set("expressions", self._parse_wrapped_options()) 8354 elif prev == "FILE_FORMAT": 8355 # T-SQL's external file format case 8356 param.set("expression", self._parse_field()) 8357 else: 8358 param.set("expression", self._parse_unquoted_field()) 8359 8360 options.append(param) 8361 self._match(sep) 8362 8363 return options 8364 8365 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8366 expr = self.expression(exp.Credentials) 8367 8368 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8369 expr.set("storage", self._parse_field()) 8370 if self._match_text_seq("CREDENTIALS"): 8371 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8372 creds = ( 8373 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8374 ) 8375 expr.set("credentials", creds) 8376 if self._match_text_seq("ENCRYPTION"): 8377 expr.set("encryption", self._parse_wrapped_options()) 8378 if self._match_text_seq("IAM_ROLE"): 8379 expr.set("iam_role", self._parse_field()) 8380 if self._match_text_seq("REGION"): 8381 expr.set("region", self._parse_field()) 8382 8383 return expr 8384 8385 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8386 return self._parse_field() 8387 8388 def _parse_copy(self) -> exp.Copy | exp.Command: 8389 start = self._prev 8390 8391 self._match(TokenType.INTO) 8392 8393 this = ( 8394 self._parse_select(nested=True, parse_subquery_alias=False) 8395 if self._match(TokenType.L_PAREN, advance=False) 8396 else self._parse_table(schema=True) 8397 ) 8398 8399 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8400 8401 files = self._parse_csv(self._parse_file_location) 8402 if self._match(TokenType.EQ, advance=False): 8403 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8404 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8405 # list via `_parse_wrapped(..)` below. 8406 self._advance(-1) 8407 files = [] 8408 8409 credentials = self._parse_credentials() 8410 8411 self._match_text_seq("WITH") 8412 8413 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8414 8415 # Fallback case 8416 if self._curr: 8417 return self._parse_as_command(start) 8418 8419 return self.expression( 8420 exp.Copy, 8421 this=this, 8422 kind=kind, 8423 credentials=credentials, 8424 files=files, 8425 params=params, 8426 ) 8427 8428 def _parse_normalize(self) -> exp.Normalize: 8429 return self.expression( 8430 exp.Normalize, 8431 this=self._parse_bitwise(), 8432 form=self._match(TokenType.COMMA) and self._parse_var(), 8433 ) 8434 8435 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8436 args = self._parse_csv(lambda: self._parse_lambda()) 8437 8438 this = seq_get(args, 0) 8439 decimals = seq_get(args, 1) 8440 8441 return expr_type( 8442 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8443 ) 8444 8445 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8446 star_token = self._prev 8447 8448 if self._match_text_seq("COLUMNS", "(", advance=False): 8449 this = self._parse_function() 8450 if isinstance(this, exp.Columns): 8451 this.set("unpack", True) 8452 return this 8453 8454 return self.expression( 8455 exp.Star, 8456 **{ # type: ignore 8457 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8458 "replace": self._parse_star_op("REPLACE"), 8459 "rename": self._parse_star_op("RENAME"), 8460 }, 8461 ).update_positions(star_token) 8462 8463 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8464 privilege_parts = [] 8465 8466 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8467 # (end of privilege list) or L_PAREN (start of column list) are met 8468 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8469 privilege_parts.append(self._curr.text.upper()) 8470 self._advance() 8471 8472 this = exp.var(" ".join(privilege_parts)) 8473 expressions = ( 8474 self._parse_wrapped_csv(self._parse_column) 8475 if self._match(TokenType.L_PAREN, advance=False) 8476 else None 8477 ) 8478 8479 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8480 8481 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8482 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8483 principal = self._parse_id_var() 8484 8485 if not principal: 8486 return None 8487 8488 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8489 8490 def _parse_grant_revoke_common( 8491 self, 8492 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8493 privileges = self._parse_csv(self._parse_grant_privilege) 8494 8495 self._match(TokenType.ON) 8496 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8497 8498 # Attempt to parse the securable e.g. MySQL allows names 8499 # such as "foo.*", "*.*" which are not easily parseable yet 8500 securable = self._try_parse(self._parse_table_parts) 8501 8502 return privileges, kind, securable 8503 8504 def _parse_grant(self) -> exp.Grant | exp.Command: 8505 start = self._prev 8506 8507 privileges, kind, securable = self._parse_grant_revoke_common() 8508 8509 if not securable or not self._match_text_seq("TO"): 8510 return self._parse_as_command(start) 8511 8512 principals = self._parse_csv(self._parse_grant_principal) 8513 8514 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8515 8516 if self._curr: 8517 return self._parse_as_command(start) 8518 8519 return self.expression( 8520 exp.Grant, 8521 privileges=privileges, 8522 kind=kind, 8523 securable=securable, 8524 principals=principals, 8525 grant_option=grant_option, 8526 ) 8527 8528 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8529 start = self._prev 8530 8531 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8532 8533 privileges, kind, securable = self._parse_grant_revoke_common() 8534 8535 if not securable or not self._match_text_seq("FROM"): 8536 return self._parse_as_command(start) 8537 8538 principals = self._parse_csv(self._parse_grant_principal) 8539 8540 cascade = None 8541 if self._match_texts(("CASCADE", "RESTRICT")): 8542 cascade = self._prev.text.upper() 8543 8544 if self._curr: 8545 return self._parse_as_command(start) 8546 8547 return self.expression( 8548 exp.Revoke, 8549 privileges=privileges, 8550 kind=kind, 8551 securable=securable, 8552 principals=principals, 8553 grant_option=grant_option, 8554 cascade=cascade, 8555 ) 8556 8557 def _parse_overlay(self) -> exp.Overlay: 8558 return self.expression( 8559 exp.Overlay, 8560 **{ # type: ignore 8561 "this": self._parse_bitwise(), 8562 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8563 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8564 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8565 }, 8566 ) 8567 8568 def _parse_format_name(self) -> exp.Property: 8569 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8570 # for FILE_FORMAT = <format_name> 8571 return self.expression( 8572 exp.Property, 8573 this=exp.var("FORMAT_NAME"), 8574 value=self._parse_string() or self._parse_table_parts(), 8575 ) 8576 8577 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8578 args: t.List[exp.Expression] = [] 8579 8580 if self._match(TokenType.DISTINCT): 8581 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8582 self._match(TokenType.COMMA) 8583 8584 args.extend(self._parse_csv(self._parse_assignment)) 8585 8586 return self.expression( 8587 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8588 ) 8589 8590 def _identifier_expression( 8591 self, token: t.Optional[Token] = None, **kwargs: t.Any 8592 ) -> exp.Identifier: 8593 token = token or self._prev 8594 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8595 expression.update_positions(token) 8596 return expression 8597 8598 def _build_pipe_cte( 8599 self, 8600 query: exp.Query, 8601 expressions: t.List[exp.Expression], 8602 alias_cte: t.Optional[exp.TableAlias] = None, 8603 ) -> exp.Select: 8604 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8605 if alias_cte: 8606 new_cte = alias_cte 8607 else: 8608 self._pipe_cte_counter += 1 8609 new_cte = f"__tmp{self._pipe_cte_counter}" 8610 8611 with_ = query.args.get("with") 8612 ctes = with_.pop() if with_ else None 8613 8614 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8615 if ctes: 8616 new_select.set("with", ctes) 8617 8618 return new_select.with_(new_cte, as_=query, copy=False) 8619 8620 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8621 select = self._parse_select(consume_pipe=False) 8622 if not select: 8623 return query 8624 8625 return self._build_pipe_cte( 8626 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8627 ) 8628 8629 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8630 limit = self._parse_limit() 8631 offset = self._parse_offset() 8632 if limit: 8633 curr_limit = query.args.get("limit", limit) 8634 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8635 query.limit(limit, copy=False) 8636 if offset: 8637 curr_offset = query.args.get("offset") 8638 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8639 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8640 8641 return query 8642 8643 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8644 this = self._parse_assignment() 8645 if self._match_text_seq("GROUP", "AND", advance=False): 8646 return this 8647 8648 this = self._parse_alias(this) 8649 8650 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8651 return self._parse_ordered(lambda: this) 8652 8653 return this 8654 8655 def _parse_pipe_syntax_aggregate_group_order_by( 8656 self, query: exp.Select, group_by_exists: bool = True 8657 ) -> exp.Select: 8658 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8659 aggregates_or_groups, orders = [], [] 8660 for element in expr: 8661 if isinstance(element, exp.Ordered): 8662 this = element.this 8663 if isinstance(this, exp.Alias): 8664 element.set("this", this.args["alias"]) 8665 orders.append(element) 8666 else: 8667 this = element 8668 aggregates_or_groups.append(this) 8669 8670 if group_by_exists: 8671 query.select(*aggregates_or_groups, copy=False).group_by( 8672 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8673 copy=False, 8674 ) 8675 else: 8676 query.select(*aggregates_or_groups, append=False, copy=False) 8677 8678 if orders: 8679 return query.order_by(*orders, append=False, copy=False) 8680 8681 return query 8682 8683 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8684 self._match_text_seq("AGGREGATE") 8685 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8686 8687 if self._match(TokenType.GROUP_BY) or ( 8688 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8689 ): 8690 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8691 8692 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8693 8694 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8695 first_setop = self.parse_set_operation(this=query) 8696 if not first_setop: 8697 return None 8698 8699 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8700 expr = self._parse_paren() 8701 return expr.assert_is(exp.Subquery).unnest() if expr else None 8702 8703 first_setop.this.pop() 8704 8705 setops = [ 8706 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8707 *self._parse_csv(_parse_and_unwrap_query), 8708 ] 8709 8710 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8711 with_ = query.args.get("with") 8712 ctes = with_.pop() if with_ else None 8713 8714 if isinstance(first_setop, exp.Union): 8715 query = query.union(*setops, copy=False, **first_setop.args) 8716 elif isinstance(first_setop, exp.Except): 8717 query = query.except_(*setops, copy=False, **first_setop.args) 8718 else: 8719 query = query.intersect(*setops, copy=False, **first_setop.args) 8720 8721 query.set("with", ctes) 8722 8723 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8724 8725 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8726 join = self._parse_join() 8727 if not join: 8728 return None 8729 8730 if isinstance(query, exp.Select): 8731 return query.join(join, copy=False) 8732 8733 return query 8734 8735 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8736 pivots = self._parse_pivots() 8737 if not pivots: 8738 return query 8739 8740 from_ = query.args.get("from") 8741 if from_: 8742 from_.this.set("pivots", pivots) 8743 8744 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8745 8746 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8747 self._match_text_seq("EXTEND") 8748 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8749 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8750 8751 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8752 sample = self._parse_table_sample() 8753 8754 with_ = query.args.get("with") 8755 if with_: 8756 with_.expressions[-1].this.set("sample", sample) 8757 else: 8758 query.set("sample", sample) 8759 8760 return query 8761 8762 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8763 if isinstance(query, exp.Subquery): 8764 query = exp.select("*").from_(query, copy=False) 8765 8766 if not query.args.get("from"): 8767 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8768 8769 while self._match(TokenType.PIPE_GT): 8770 start = self._curr 8771 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8772 if not parser: 8773 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8774 # keywords, making it tricky to disambiguate them without lookahead. The approach 8775 # here is to try and parse a set operation and if that fails, then try to parse a 8776 # join operator. If that fails as well, then the operator is not supported. 8777 parsed_query = self._parse_pipe_syntax_set_operator(query) 8778 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8779 if not parsed_query: 8780 self._retreat(start) 8781 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8782 break 8783 query = parsed_query 8784 else: 8785 query = parser(self, query) 8786 8787 return query 8788 8789 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8790 vars = self._parse_csv(self._parse_id_var) 8791 if not vars: 8792 return None 8793 8794 return self.expression( 8795 exp.DeclareItem, 8796 this=vars, 8797 kind=self._parse_types(), 8798 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8799 ) 8800 8801 def _parse_declare(self) -> exp.Declare | exp.Command: 8802 start = self._prev 8803 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8804 8805 if not expressions or self._curr: 8806 return self._parse_as_command(start) 8807 8808 return self.expression(exp.Declare, expressions=expressions) 8809 8810 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8811 exp_class = exp.Cast if strict else exp.TryCast 8812 8813 if exp_class == exp.TryCast: 8814 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8815 8816 return self.expression(exp_class, **kwargs) 8817 8818 def _parse_json_value(self) -> exp.JSONValue: 8819 this = self._parse_bitwise() 8820 self._match(TokenType.COMMA) 8821 path = self._parse_bitwise() 8822 8823 returning = self._match(TokenType.RETURNING) and self._parse_type() 8824 8825 return self.expression( 8826 exp.JSONValue, 8827 this=this, 8828 path=self.dialect.to_json_path(path), 8829 returning=returning, 8830 on_condition=self._parse_on_condition(), 8831 ) 8832 8833 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8834 def concat_exprs( 8835 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8836 ) -> exp.Expression: 8837 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8838 concat_exprs = [ 8839 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8840 ] 8841 node.set("expressions", concat_exprs) 8842 return node 8843 if len(exprs) == 1: 8844 return exprs[0] 8845 return self.expression(exp.Concat, expressions=args, safe=True) 8846 8847 args = self._parse_csv(self._parse_lambda) 8848 8849 if args: 8850 order = args[-1] if isinstance(args[-1], exp.Order) else None 8851 8852 if order: 8853 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8854 # remove 'expr' from exp.Order and add it back to args 8855 args[-1] = order.this 8856 order.set("this", concat_exprs(order.this, args)) 8857 8858 this = order or concat_exprs(args[0], args) 8859 else: 8860 this = None 8861 8862 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8863 8864 return self.expression(exp.GroupConcat, this=this, separator=separator)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1586 def __init__( 1587 self, 1588 error_level: t.Optional[ErrorLevel] = None, 1589 error_message_context: int = 100, 1590 max_errors: int = 3, 1591 dialect: DialectType = None, 1592 ): 1593 from sqlglot.dialects import Dialect 1594 1595 self.error_level = error_level or ErrorLevel.IMMEDIATE 1596 self.error_message_context = error_message_context 1597 self.max_errors = max_errors 1598 self.dialect = Dialect.get_or_raise(dialect) 1599 self.reset()
1612 def parse( 1613 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1614 ) -> t.List[t.Optional[exp.Expression]]: 1615 """ 1616 Parses a list of tokens and returns a list of syntax trees, one tree 1617 per parsed SQL statement. 1618 1619 Args: 1620 raw_tokens: The list of tokens. 1621 sql: The original SQL string, used to produce helpful debug messages. 1622 1623 Returns: 1624 The list of the produced syntax trees. 1625 """ 1626 return self._parse( 1627 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1628 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1630 def parse_into( 1631 self, 1632 expression_types: exp.IntoType, 1633 raw_tokens: t.List[Token], 1634 sql: t.Optional[str] = None, 1635 ) -> t.List[t.Optional[exp.Expression]]: 1636 """ 1637 Parses a list of tokens into a given Expression type. If a collection of Expression 1638 types is given instead, this method will try to parse the token list into each one 1639 of them, stopping at the first for which the parsing succeeds. 1640 1641 Args: 1642 expression_types: The expression type(s) to try and parse the token list into. 1643 raw_tokens: The list of tokens. 1644 sql: The original SQL string, used to produce helpful debug messages. 1645 1646 Returns: 1647 The target Expression. 1648 """ 1649 errors = [] 1650 for expression_type in ensure_list(expression_types): 1651 parser = self.EXPRESSION_PARSERS.get(expression_type) 1652 if not parser: 1653 raise TypeError(f"No parser registered for {expression_type}") 1654 1655 try: 1656 return self._parse(parser, raw_tokens, sql) 1657 except ParseError as e: 1658 e.errors[0]["into_expression"] = expression_type 1659 errors.append(e) 1660 1661 raise ParseError( 1662 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1663 errors=merge_errors(errors), 1664 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1704 def check_errors(self) -> None: 1705 """Logs or raises any found errors, depending on the chosen error level setting.""" 1706 if self.error_level == ErrorLevel.WARN: 1707 for error in self.errors: 1708 logger.error(str(error)) 1709 elif self.error_level == ErrorLevel.RAISE and self.errors: 1710 raise ParseError( 1711 concat_messages(self.errors, self.max_errors), 1712 errors=merge_errors(self.errors), 1713 )
Logs or raises any found errors, depending on the chosen error level setting.
1715 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1716 """ 1717 Appends an error in the list of recorded errors or raises it, depending on the chosen 1718 error level setting. 1719 """ 1720 token = token or self._curr or self._prev or Token.string("") 1721 start = token.start 1722 end = token.end + 1 1723 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1724 highlight = self.sql[start:end] 1725 end_context = self.sql[end : end + self.error_message_context] 1726 1727 error = ParseError.new( 1728 f"{message}. Line {token.line}, Col: {token.col}.\n" 1729 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1730 description=message, 1731 line=token.line, 1732 col=token.col, 1733 start_context=start_context, 1734 highlight=highlight, 1735 end_context=end_context, 1736 ) 1737 1738 if self.error_level == ErrorLevel.IMMEDIATE: 1739 raise error 1740 1741 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1743 def expression( 1744 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1745 ) -> E: 1746 """ 1747 Creates a new, validated Expression. 1748 1749 Args: 1750 exp_class: The expression class to instantiate. 1751 comments: An optional list of comments to attach to the expression. 1752 kwargs: The arguments to set for the expression along with their respective values. 1753 1754 Returns: 1755 The target expression. 1756 """ 1757 instance = exp_class(**kwargs) 1758 instance.add_comments(comments) if comments else self._add_comments(instance) 1759 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1766 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1767 """ 1768 Validates an Expression, making sure that all its mandatory arguments are set. 1769 1770 Args: 1771 expression: The expression to validate. 1772 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1773 1774 Returns: 1775 The validated expression. 1776 """ 1777 if self.error_level != ErrorLevel.IGNORE: 1778 for error_message in expression.error_messages(args): 1779 self.raise_error(error_message) 1780 1781 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4864 def parse_set_operation( 4865 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4866 ) -> t.Optional[exp.Expression]: 4867 start = self._index 4868 _, side_token, kind_token = self._parse_join_parts() 4869 4870 side = side_token.text if side_token else None 4871 kind = kind_token.text if kind_token else None 4872 4873 if not self._match_set(self.SET_OPERATIONS): 4874 self._retreat(start) 4875 return None 4876 4877 token_type = self._prev.token_type 4878 4879 if token_type == TokenType.UNION: 4880 operation: t.Type[exp.SetOperation] = exp.Union 4881 elif token_type == TokenType.EXCEPT: 4882 operation = exp.Except 4883 else: 4884 operation = exp.Intersect 4885 4886 comments = self._prev.comments 4887 4888 if self._match(TokenType.DISTINCT): 4889 distinct: t.Optional[bool] = True 4890 elif self._match(TokenType.ALL): 4891 distinct = False 4892 else: 4893 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4894 if distinct is None: 4895 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4896 4897 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4898 "STRICT", "CORRESPONDING" 4899 ) 4900 if self._match_text_seq("CORRESPONDING"): 4901 by_name = True 4902 if not side and not kind: 4903 kind = "INNER" 4904 4905 on_column_list = None 4906 if by_name and self._match_texts(("ON", "BY")): 4907 on_column_list = self._parse_wrapped_csv(self._parse_column) 4908 4909 expression = self._parse_select( 4910 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4911 ) 4912 4913 return self.expression( 4914 operation, 4915 comments=comments, 4916 this=this, 4917 distinct=distinct, 4918 by_name=by_name, 4919 expression=expression, 4920 side=side, 4921 kind=kind, 4922 on=on_column_list, 4923 )